@arabold/docs-mcp-server 1.19.0 → 1.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,9 +1,15 @@
1
1
  import "dotenv/config";
2
- import { Option, Command } from "commander";
2
+ import { PostHog } from "posthog-node";
3
+ import crypto, { randomUUID } from "node:crypto";
4
+ import fs, { readFileSync, existsSync } from "node:fs";
3
5
  import path from "node:path";
6
+ import envPaths from "env-paths";
7
+ import { Option, Command } from "commander";
4
8
  import formBody from "@fastify/formbody";
5
9
  import fastifyStatic from "@fastify/static";
6
10
  import Fastify from "fastify";
11
+ import { ProxyOAuthServerProvider } from "@modelcontextprotocol/sdk/server/auth/providers/proxyProvider.js";
12
+ import { createRemoteJWKSet, jwtVerify } from "jose";
7
13
  import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
8
14
  import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
9
15
  import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
@@ -17,7 +23,7 @@ import { chromium } from "playwright";
17
23
  import { gfm } from "@joplin/turndown-plugin-gfm";
18
24
  import TurndownService from "turndown";
19
25
  import iconv from "iconv-lite";
20
- import fs from "node:fs/promises";
26
+ import fs$1 from "node:fs/promises";
21
27
  import * as mime from "mime-types";
22
28
  import axios from "axios";
23
29
  import { HeaderGenerator } from "header-generator";
@@ -25,7 +31,6 @@ import { initTRPC } from "@trpc/server";
25
31
  import { fastifyTRPCPlugin } from "@trpc/server/adapters/fastify";
26
32
  import { z as z$1 } from "zod";
27
33
  import { jsxs, jsx, Fragment } from "@kitajs/html/jsx-runtime";
28
- import fs$1, { readFileSync, existsSync } from "node:fs";
29
34
  import { unified } from "unified";
30
35
  import remarkParse from "remark-parse";
31
36
  import remarkGfm from "remark-gfm";
@@ -34,7 +39,6 @@ import DOMPurify from "dompurify";
34
39
  import { fileURLToPath, URL as URL$1 } from "node:url";
35
40
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
36
41
  import { createTRPCProxyClient, httpBatchLink } from "@trpc/client";
37
- import "env-paths";
38
42
  import "fuse.js";
39
43
  import "langchain/text_splitter";
40
44
  import "better-sqlite3";
@@ -101,10 +105,806 @@ const logger = {
101
105
  }
102
106
  }
103
107
  };
104
- const version = "1.18.0";
108
+ class PostHogClient {
109
+ client;
110
+ enabled;
111
+ // PostHog configuration
112
+ static CONFIG = {
113
+ host: "https://app.posthog.com",
114
+ // Performance optimizations
115
+ flushAt: 20,
116
+ // Batch size - send after 20 events
117
+ flushInterval: 1e4,
118
+ // 10 seconds - send after time
119
+ // Privacy settings
120
+ disableGeoip: true,
121
+ // Don't collect IP geolocation
122
+ disableSessionRecording: true,
123
+ // Never record sessions
124
+ disableSurveys: true,
125
+ // No user surveys
126
+ // Data handling
127
+ persistence: "memory"
128
+ // No disk persistence for privacy
129
+ };
130
+ constructor(enabled) {
131
+ this.enabled = enabled;
132
+ if (this.enabled) {
133
+ try {
134
+ this.client = new PostHog("phc_g7pXZZdUiAQXdnwUANjloQWMvO0amEDTBaeDSWgXgrQ", {
135
+ host: PostHogClient.CONFIG.host,
136
+ flushAt: PostHogClient.CONFIG.flushAt,
137
+ flushInterval: PostHogClient.CONFIG.flushInterval,
138
+ disableGeoip: PostHogClient.CONFIG.disableGeoip
139
+ });
140
+ logger.debug("PostHog client initialized");
141
+ } catch (error) {
142
+ logger.debug(
143
+ `PostHog initialization failed: ${error instanceof Error ? error.message : "Unknown error"}`
144
+ );
145
+ this.enabled = false;
146
+ }
147
+ } else {
148
+ this.enabled = false;
149
+ logger.debug("PostHog client disabled");
150
+ }
151
+ }
152
+ /**
153
+ * Send event to PostHog
154
+ */
155
+ capture(distinctId, event, properties) {
156
+ if (!this.enabled || !this.client) return;
157
+ try {
158
+ this.client.capture({
159
+ distinctId,
160
+ event,
161
+ properties
162
+ });
163
+ logger.debug(`PostHog event captured: ${event}`);
164
+ } catch (error) {
165
+ logger.debug(
166
+ `PostHog capture error: ${error instanceof Error ? error.message : "Unknown error"}`
167
+ );
168
+ }
169
+ }
170
+ /**
171
+ * Graceful shutdown with event flushing
172
+ */
173
+ async shutdown() {
174
+ if (this.client) {
175
+ try {
176
+ await this.client.shutdown();
177
+ logger.debug("PostHog client shutdown complete");
178
+ } catch (error) {
179
+ logger.debug(
180
+ `PostHog shutdown error: ${error instanceof Error ? error.message : "Unknown error"}`
181
+ );
182
+ }
183
+ }
184
+ }
185
+ /**
186
+ * Check if client is enabled and ready
187
+ */
188
+ isEnabled() {
189
+ return this.enabled && !!this.client;
190
+ }
191
+ }
192
+ class SessionTracker {
193
+ sessionContext;
194
+ /**
195
+ * Start a new session with context
196
+ */
197
+ startSession(context) {
198
+ this.sessionContext = context;
199
+ }
200
+ /**
201
+ * End current session and return duration
202
+ */
203
+ endSession() {
204
+ if (!this.sessionContext) return null;
205
+ const duration = Date.now() - this.sessionContext.startTime.getTime();
206
+ const sessionInterface = this.sessionContext.interface;
207
+ this.sessionContext = void 0;
208
+ return { duration, interface: sessionInterface };
209
+ }
210
+ /**
211
+ * Get current session context
212
+ */
213
+ getSessionContext() {
214
+ return this.sessionContext;
215
+ }
216
+ /**
217
+ * Get enriched properties with session context
218
+ */
219
+ getEnrichedProperties(properties = {}) {
220
+ return {
221
+ ...this.sessionContext,
222
+ ...properties,
223
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
224
+ };
225
+ }
226
+ }
227
+ class TelemetryConfig {
228
+ static instance;
229
+ enabled;
230
+ constructor() {
231
+ this.enabled = this.determineEnabledState();
232
+ }
233
+ /**
234
+ * Determines if telemetry should be enabled based on CLI flags and environment variables.
235
+ * Priority: CLI flags > environment variables > default (true)
236
+ */
237
+ determineEnabledState() {
238
+ if (process.env.DOCS_MCP_TELEMETRY === "false") {
239
+ return false;
240
+ }
241
+ const args = process.argv;
242
+ if (args.includes("--no-telemetry")) {
243
+ return false;
244
+ }
245
+ return true;
246
+ }
247
+ isEnabled() {
248
+ return this.enabled;
249
+ }
250
+ disable() {
251
+ this.enabled = false;
252
+ }
253
+ enable() {
254
+ this.enabled = true;
255
+ }
256
+ static getInstance() {
257
+ if (!TelemetryConfig.instance) {
258
+ TelemetryConfig.instance = new TelemetryConfig();
259
+ }
260
+ return TelemetryConfig.instance;
261
+ }
262
+ }
263
+ function generateInstallationId() {
264
+ try {
265
+ const envStorePath = process.env.DOCS_MCP_STORE_PATH;
266
+ const dataDir = envStorePath || envPaths("docs-mcp-server", { suffix: "" }).data;
267
+ const installationIdPath = path.join(dataDir, "installation.id");
268
+ if (fs.existsSync(installationIdPath)) {
269
+ const existingId = fs.readFileSync(installationIdPath, "utf8").trim();
270
+ if (existingId) {
271
+ return existingId;
272
+ }
273
+ }
274
+ const newId = randomUUID();
275
+ fs.mkdirSync(dataDir, { recursive: true });
276
+ fs.writeFileSync(installationIdPath, newId, "utf8");
277
+ return newId;
278
+ } catch {
279
+ return randomUUID();
280
+ }
281
+ }
282
+ function shouldEnableTelemetry() {
283
+ return TelemetryConfig.getInstance().isEnabled();
284
+ }
285
+ var TelemetryEvent = /* @__PURE__ */ ((TelemetryEvent2) => {
286
+ TelemetryEvent2["SESSION_STARTED"] = "session_started";
287
+ TelemetryEvent2["SESSION_ENDED"] = "session_ended";
288
+ TelemetryEvent2["APP_STARTED"] = "app_started";
289
+ TelemetryEvent2["APP_SHUTDOWN"] = "app_shutdown";
290
+ TelemetryEvent2["COMMAND_EXECUTED"] = "command_executed";
291
+ TelemetryEvent2["TOOL_USED"] = "tool_used";
292
+ TelemetryEvent2["HTTP_REQUEST_COMPLETED"] = "http_request_completed";
293
+ TelemetryEvent2["PIPELINE_JOB_PROGRESS"] = "pipeline_job_progress";
294
+ TelemetryEvent2["PIPELINE_JOB_COMPLETED"] = "pipeline_job_completed";
295
+ TelemetryEvent2["DOCUMENT_PROCESSED"] = "document_processed";
296
+ TelemetryEvent2["DOCUMENT_PROCESSING_FAILED"] = "document_processing_failed";
297
+ TelemetryEvent2["ERROR_OCCURRED"] = "error_occurred";
298
+ return TelemetryEvent2;
299
+ })(TelemetryEvent || {});
300
+ class Analytics {
301
+ postHogClient;
302
+ sessionTracker;
303
+ enabled = true;
304
+ distinctId;
305
+ constructor(enabled) {
306
+ this.enabled = enabled ?? TelemetryConfig.getInstance().isEnabled();
307
+ this.distinctId = generateInstallationId();
308
+ this.postHogClient = new PostHogClient(this.enabled);
309
+ this.sessionTracker = new SessionTracker();
310
+ if (this.enabled) {
311
+ logger.debug("Analytics enabled");
312
+ } else {
313
+ logger.debug("Analytics disabled");
314
+ }
315
+ }
316
+ /**
317
+ * Initialize session context - call once per session
318
+ */
319
+ startSession(context) {
320
+ if (!this.enabled) return;
321
+ this.sessionTracker.startSession(context);
322
+ this.track("session_started", {
323
+ interface: context.interface,
324
+ version: context.version,
325
+ platform: context.platform,
326
+ sessionDurationTarget: context.interface === "cli" ? "short" : "long",
327
+ authEnabled: context.authEnabled,
328
+ readOnly: context.readOnly,
329
+ servicesCount: context.servicesEnabled.length
330
+ });
331
+ }
332
+ /**
333
+ * Track an event with automatic session context inclusion
334
+ */
335
+ track(event, properties = {}) {
336
+ if (!this.enabled) return;
337
+ const eventProperties = this.sessionTracker.getEnrichedProperties(properties);
338
+ this.postHogClient.capture(this.distinctId, event, eventProperties);
339
+ }
340
+ /**
341
+ * Track session end with duration
342
+ */
343
+ endSession() {
344
+ if (!this.enabled) return;
345
+ const sessionInfo = this.sessionTracker.endSession();
346
+ if (sessionInfo) {
347
+ this.track("session_ended", {
348
+ durationMs: sessionInfo.duration,
349
+ interface: sessionInfo.interface
350
+ });
351
+ }
352
+ }
353
+ /**
354
+ * Graceful shutdown with event flushing
355
+ */
356
+ async shutdown() {
357
+ await this.postHogClient.shutdown();
358
+ }
359
+ /**
360
+ * Check if analytics is enabled
361
+ */
362
+ isEnabled() {
363
+ return this.enabled && this.postHogClient.isEnabled();
364
+ }
365
+ /**
366
+ * Get current session context
367
+ */
368
+ getSessionContext() {
369
+ return this.sessionTracker.getSessionContext();
370
+ }
371
+ }
372
+ const analytics = new Analytics();
373
+ async function trackTool(toolName, operation, getProperties) {
374
+ const startTime = Date.now();
375
+ try {
376
+ const result = await operation();
377
+ analytics.track("tool_used", {
378
+ tool: toolName,
379
+ success: true,
380
+ durationMs: Date.now() - startTime,
381
+ ...getProperties ? getProperties(result) : {}
382
+ });
383
+ return result;
384
+ } catch (error) {
385
+ analytics.track("tool_used", {
386
+ tool: toolName,
387
+ success: false,
388
+ durationMs: Date.now() - startTime,
389
+ errorType: error instanceof Error ? error.constructor.name : "UnknownError"
390
+ });
391
+ throw error;
392
+ }
393
+ }
394
+ function extractHostname(url) {
395
+ try {
396
+ const parsed = new URL(url);
397
+ return parsed.hostname;
398
+ } catch {
399
+ return "invalid-hostname";
400
+ }
401
+ }
402
+ function extractProtocol(urlOrPath) {
403
+ try {
404
+ const parsed = new URL(urlOrPath);
405
+ return parsed.protocol.replace(":", "");
406
+ } catch {
407
+ if (urlOrPath.startsWith("/") || /^[A-Za-z]:/.test(urlOrPath)) {
408
+ return "file";
409
+ }
410
+ return "unknown";
411
+ }
412
+ }
413
+ function analyzeSearchQuery(query) {
414
+ return {
415
+ length: query.length,
416
+ wordCount: query.trim().split(/\s+/).length,
417
+ hasCodeTerms: /\b(function|class|import|export|const|let|var|def|async|await)\b/i.test(query),
418
+ hasSpecialChars: /[^\w\s]/.test(query)
419
+ };
420
+ }
421
+ function sanitizeErrorMessage(message) {
422
+ return message.replace(/https?:\/\/[^\s]+/gi, "[url]").replace(/file:\/\/[^\s]+/gi, "[file-url]").replace(/\/[^\s]*\.[a-z]{2,4}/gi, "[path]").replace(/[A-Za-z]:\\[^\s]+/g, "[path]").replace(/Bearer\s+[^\s]+/gi, "Bearer [token]").replace(/api[_-]?key[=:]\s*[^\s]+/gi, "api_key=[redacted]").replace(/token[=:]\s*[^\s]+/gi, "token=[redacted]").substring(0, 200);
423
+ }
424
+ function sanitizeError(error) {
425
+ return {
426
+ type: error.constructor.name,
427
+ message: sanitizeErrorMessage(error.message),
428
+ hasStack: Boolean(error.stack)
429
+ };
430
+ }
431
+ function extractCliFlags(argv) {
432
+ return argv.filter((arg) => arg.startsWith("--") || arg.startsWith("-"));
433
+ }
434
+ const version = "1.20.0";
105
435
  const packageJson = {
106
436
  version
107
437
  };
438
+ function getPackageVersion() {
439
+ return packageJson.version;
440
+ }
441
+ function createCliSession(command, options) {
442
+ return {
443
+ sessionId: randomUUID(),
444
+ interface: "cli",
445
+ startTime: /* @__PURE__ */ new Date(),
446
+ version: getPackageVersion(),
447
+ platform: process.platform,
448
+ nodeVersion: process.version,
449
+ command: command || "unknown",
450
+ authEnabled: options?.authEnabled,
451
+ readOnly: options?.readOnly,
452
+ servicesEnabled: ["worker"]
453
+ // CLI typically runs embedded worker
454
+ };
455
+ }
456
+ function createMcpSession(options) {
457
+ return {
458
+ sessionId: randomUUID(),
459
+ interface: "mcp",
460
+ startTime: /* @__PURE__ */ new Date(),
461
+ version: getPackageVersion(),
462
+ platform: process.platform,
463
+ nodeVersion: process.version,
464
+ protocol: options.protocol || "stdio",
465
+ transport: options.transport,
466
+ authEnabled: options.authEnabled ?? false,
467
+ readOnly: options.readOnly ?? false,
468
+ servicesEnabled: options.servicesEnabled ?? ["mcp"]
469
+ };
470
+ }
471
+ function createTelemetryService() {
472
+ return {
473
+ startSession: (context) => {
474
+ analytics.startSession(context);
475
+ },
476
+ endSession: () => {
477
+ analytics.endSession();
478
+ },
479
+ shutdown: async () => {
480
+ await analytics.shutdown();
481
+ }
482
+ };
483
+ }
484
+ const telemetryService = createTelemetryService();
485
+ function createAuthMiddleware(authManager) {
486
+ return async (request, reply) => {
487
+ try {
488
+ const authContext = await authManager.createAuthContext(
489
+ request.headers.authorization || "",
490
+ request
491
+ );
492
+ request.auth = authContext;
493
+ const isAuthEnabled = authManager.authConfig.enabled;
494
+ if (!isAuthEnabled) {
495
+ logger.debug("Authentication disabled, allowing request");
496
+ return;
497
+ }
498
+ if (!authContext.authenticated) {
499
+ const hasAuthHeader = !!request.headers.authorization;
500
+ if (hasAuthHeader) {
501
+ logger.debug("Token validation failed");
502
+ reply.status(401).header(
503
+ "WWW-Authenticate",
504
+ 'Bearer realm="MCP Server", error="invalid_token"'
505
+ ).send({
506
+ error: "invalid_token",
507
+ error_description: "The access token is invalid"
508
+ });
509
+ return;
510
+ } else {
511
+ logger.debug("Missing authorization header");
512
+ reply.status(401).header("WWW-Authenticate", 'Bearer realm="MCP Server"').send({
513
+ error: "unauthorized",
514
+ error_description: "Authorization header required"
515
+ });
516
+ return;
517
+ }
518
+ }
519
+ logger.debug(
520
+ `Authentication successful for subject: ${authContext.subject || "anonymous"}`
521
+ );
522
+ } catch (error) {
523
+ const message = error instanceof Error ? error.message : "Authentication failed";
524
+ logger.debug(`Authentication error: ${message}`);
525
+ reply.status(401).header("WWW-Authenticate", 'Bearer realm="MCP Server", error="invalid_token"').send({
526
+ error: "invalid_token",
527
+ error_description: "Token validation failed"
528
+ });
529
+ }
530
+ };
531
+ }
532
+ class ProxyAuthManager {
533
+ constructor(config) {
534
+ this.config = config;
535
+ }
536
+ proxyProvider = null;
537
+ discoveredEndpoints = null;
538
+ jwks = null;
539
+ /**
540
+ * Get the authentication configuration
541
+ */
542
+ get authConfig() {
543
+ return this.config;
544
+ }
545
+ /**
546
+ * Initialize the proxy auth manager with the configured OAuth provider.
547
+ */
548
+ async initialize() {
549
+ if (!this.config.enabled) {
550
+ logger.debug("Authentication disabled, skipping proxy auth manager initialization");
551
+ return;
552
+ }
553
+ if (!this.config.issuerUrl || !this.config.audience) {
554
+ throw new Error("Issuer URL and Audience are required when auth is enabled");
555
+ }
556
+ try {
557
+ logger.info("🔐 Initializing OAuth2 proxy authentication...");
558
+ this.discoveredEndpoints = await this.discoverEndpoints();
559
+ if (this.discoveredEndpoints.jwksUri) {
560
+ this.jwks = createRemoteJWKSet(new URL(this.discoveredEndpoints.jwksUri));
561
+ logger.debug(`JWKS configured from: ${this.discoveredEndpoints.jwksUri}`);
562
+ }
563
+ const capabilities = [];
564
+ if (this.discoveredEndpoints.jwksUri) capabilities.push("JWT validation via JWKS");
565
+ if (this.discoveredEndpoints.userinfoUrl)
566
+ capabilities.push("opaque token validation via userinfo");
567
+ logger.debug(`Token validation capabilities: ${capabilities.join(", ")}`);
568
+ if (capabilities.length === 0) {
569
+ logger.warn(
570
+ "⚠️ No token validation mechanisms available - authentication may fail"
571
+ );
572
+ }
573
+ this.proxyProvider = new ProxyOAuthServerProvider({
574
+ endpoints: {
575
+ authorizationUrl: this.discoveredEndpoints.authorizationUrl,
576
+ tokenUrl: this.discoveredEndpoints.tokenUrl,
577
+ revocationUrl: this.discoveredEndpoints.revocationUrl,
578
+ registrationUrl: this.discoveredEndpoints.registrationUrl
579
+ },
580
+ verifyAccessToken: this.verifyAccessToken.bind(this),
581
+ getClient: this.getClient.bind(this)
582
+ });
583
+ logger.info("✅ OAuth2 proxy authentication initialized successfully");
584
+ } catch (error) {
585
+ const message = error instanceof Error ? error.message : "Unknown error";
586
+ logger.error(`❌ Failed to initialize OAuth2 proxy authentication: ${message}`);
587
+ throw new Error(`Proxy authentication initialization failed: ${message}`);
588
+ }
589
+ }
590
+ /**
591
+ * Register OAuth2 endpoints on the Fastify server.
592
+ * This manually implements the necessary OAuth2 endpoints using the proxy provider.
593
+ */
594
+ registerRoutes(server, baseUrl) {
595
+ if (!this.proxyProvider) {
596
+ throw new Error("Proxy provider not initialized");
597
+ }
598
+ server.get("/.well-known/oauth-authorization-server", async (_request, reply) => {
599
+ const metadata = {
600
+ issuer: baseUrl.origin,
601
+ authorization_endpoint: `${baseUrl.origin}/oauth/authorize`,
602
+ token_endpoint: `${baseUrl.origin}/oauth/token`,
603
+ revocation_endpoint: `${baseUrl.origin}/oauth/revoke`,
604
+ registration_endpoint: `${baseUrl.origin}/oauth/register`,
605
+ scopes_supported: ["profile", "email"],
606
+ response_types_supported: ["code"],
607
+ grant_types_supported: ["authorization_code", "refresh_token"],
608
+ token_endpoint_auth_methods_supported: [
609
+ "client_secret_basic",
610
+ "client_secret_post",
611
+ "none"
612
+ ],
613
+ code_challenge_methods_supported: ["S256"]
614
+ };
615
+ reply.type("application/json").send(metadata);
616
+ });
617
+ server.get("/.well-known/oauth-protected-resource", async (request, reply) => {
618
+ const baseUrl2 = `${request.protocol}://${request.headers.host}`;
619
+ const metadata = {
620
+ resource: `${baseUrl2}/sse`,
621
+ authorization_servers: [this.config.issuerUrl],
622
+ scopes_supported: ["profile", "email"],
623
+ bearer_methods_supported: ["header"],
624
+ resource_name: "Documentation MCP Server",
625
+ resource_documentation: "https://github.com/arabold/docs-mcp-server#readme",
626
+ // Enhanced metadata for better discoverability
627
+ resource_server_metadata_url: `${baseUrl2}/.well-known/oauth-protected-resource`,
628
+ authorization_server_metadata_url: `${this.config.issuerUrl}/.well-known/openid-configuration`,
629
+ jwks_uri: `${this.config.issuerUrl}/.well-known/jwks.json`,
630
+ // Supported MCP transports
631
+ mcp_transports: [
632
+ {
633
+ transport: "sse",
634
+ endpoint: `${baseUrl2}/sse`,
635
+ description: "Server-Sent Events transport"
636
+ },
637
+ {
638
+ transport: "http",
639
+ endpoint: `${baseUrl2}/mcp`,
640
+ description: "Streaming HTTP transport"
641
+ }
642
+ ]
643
+ };
644
+ reply.type("application/json").send(metadata);
645
+ });
646
+ server.get("/oauth/authorize", async (request, reply) => {
647
+ const endpoints = await this.discoverEndpoints();
648
+ const params = new URLSearchParams(request.query);
649
+ if (!params.has("resource")) {
650
+ const resourceUrl = `${request.protocol}://${request.headers.host}/sse`;
651
+ params.set("resource", resourceUrl);
652
+ }
653
+ const redirectUrl = `${endpoints.authorizationUrl}?${params.toString()}`;
654
+ reply.redirect(redirectUrl);
655
+ });
656
+ server.post("/oauth/token", async (request, reply) => {
657
+ const endpoints = await this.discoverEndpoints();
658
+ const tokenBody = new URLSearchParams(request.body);
659
+ if (!tokenBody.has("resource")) {
660
+ const resourceUrl = `${request.protocol}://${request.headers.host}/sse`;
661
+ tokenBody.set("resource", resourceUrl);
662
+ }
663
+ const response = await fetch(endpoints.tokenUrl, {
664
+ method: "POST",
665
+ headers: {
666
+ "Content-Type": "application/x-www-form-urlencoded"
667
+ },
668
+ body: tokenBody.toString()
669
+ });
670
+ const data = await response.json();
671
+ reply.status(response.status).type("application/json").send(data);
672
+ });
673
+ server.post("/oauth/revoke", async (request, reply) => {
674
+ const endpoints = await this.discoverEndpoints();
675
+ if (endpoints.revocationUrl) {
676
+ const response = await fetch(endpoints.revocationUrl, {
677
+ method: "POST",
678
+ headers: {
679
+ "Content-Type": "application/x-www-form-urlencoded"
680
+ },
681
+ body: new URLSearchParams(request.body).toString()
682
+ });
683
+ reply.status(response.status).send();
684
+ } else {
685
+ reply.status(404).send({ error: "Revocation not supported" });
686
+ }
687
+ });
688
+ server.post("/oauth/register", async (request, reply) => {
689
+ const endpoints = await this.discoverEndpoints();
690
+ if (endpoints.registrationUrl) {
691
+ const response = await fetch(endpoints.registrationUrl, {
692
+ method: "POST",
693
+ headers: {
694
+ "Content-Type": "application/json"
695
+ },
696
+ body: JSON.stringify(request.body)
697
+ });
698
+ const data = await response.json();
699
+ reply.status(response.status).type("application/json").send(data);
700
+ } else {
701
+ reply.status(404).send({ error: "Dynamic client registration not supported" });
702
+ }
703
+ });
704
+ logger.debug("OAuth2 endpoints registered on Fastify server");
705
+ }
706
+ /**
707
+ * Discover OAuth endpoints from the OAuth2 authorization server.
708
+ * Uses OAuth2 discovery (RFC 8414) with OIDC discovery fallback.
709
+ * Supports both JWT and opaque token validation methods.
710
+ */
711
+ async discoverEndpoints() {
712
+ const oauthDiscoveryUrl = `${this.config.issuerUrl}/.well-known/oauth-authorization-server`;
713
+ try {
714
+ const oauthResponse = await fetch(oauthDiscoveryUrl);
715
+ if (oauthResponse.ok) {
716
+ const config2 = await oauthResponse.json();
717
+ logger.debug(
718
+ `Successfully discovered OAuth2 endpoints from: ${oauthDiscoveryUrl}`
719
+ );
720
+ const userinfoEndpoint = await this.discoverUserinfoEndpoint();
721
+ if (userinfoEndpoint) {
722
+ config2.userinfo_endpoint = userinfoEndpoint;
723
+ }
724
+ return this.buildEndpointsFromConfig(config2);
725
+ }
726
+ } catch (error) {
727
+ logger.debug(`OAuth2 discovery failed: ${error}, trying OIDC discovery`);
728
+ }
729
+ const oidcDiscoveryUrl = `${this.config.issuerUrl}/.well-known/openid-configuration`;
730
+ const oidcResponse = await fetch(oidcDiscoveryUrl);
731
+ if (!oidcResponse.ok) {
732
+ throw new Error(
733
+ `Failed to fetch configuration from both ${oauthDiscoveryUrl} and ${oidcDiscoveryUrl}`
734
+ );
735
+ }
736
+ const config = await oidcResponse.json();
737
+ logger.debug(`Successfully discovered OIDC endpoints from: ${oidcDiscoveryUrl}`);
738
+ return this.buildEndpointsFromConfig(config);
739
+ }
740
+ /**
741
+ * Try to discover userinfo endpoint for opaque token validation
742
+ */
743
+ async discoverUserinfoEndpoint() {
744
+ try {
745
+ const oidcDiscoveryUrl = `${this.config.issuerUrl}/.well-known/openid-configuration`;
746
+ const response = await fetch(oidcDiscoveryUrl);
747
+ if (response.ok) {
748
+ const config = await response.json();
749
+ return config.userinfo_endpoint || null;
750
+ }
751
+ } catch (error) {
752
+ logger.debug(`Failed to fetch userinfo endpoint: ${error}`);
753
+ }
754
+ return null;
755
+ }
756
+ /**
757
+ * Build endpoint configuration from discovery response.
758
+ */
759
+ buildEndpointsFromConfig(config) {
760
+ return {
761
+ authorizationUrl: config.authorization_endpoint,
762
+ tokenUrl: config.token_endpoint,
763
+ revocationUrl: config.revocation_endpoint,
764
+ registrationUrl: config.registration_endpoint,
765
+ jwksUri: config.jwks_uri,
766
+ userinfoUrl: config.userinfo_endpoint
767
+ };
768
+ }
769
+ /**
770
+ * Get supported resource URLs for this MCP server instance.
771
+ * This enables self-discovering resource validation per MCP Authorization spec.
772
+ */
773
+ getSupportedResources(request) {
774
+ const baseUrl = `${request.protocol}://${request.headers.host}`;
775
+ return [
776
+ `${baseUrl}/sse`,
777
+ // SSE transport
778
+ `${baseUrl}/mcp`,
779
+ // Streaming HTTP transport
780
+ `${baseUrl}`
781
+ // Server root
782
+ ];
783
+ }
784
+ /**
785
+ * Verify an access token using hybrid validation approach.
786
+ * First tries JWT validation with JWKS, falls back to userinfo endpoint for opaque tokens.
787
+ * This provides universal compatibility with all OAuth2 providers and token formats.
788
+ */
789
+ async verifyAccessToken(token, request) {
790
+ logger.debug(`Attempting to verify token: ${token.substring(0, 20)}...`);
791
+ if (this.jwks) {
792
+ try {
793
+ logger.debug("Attempting JWT validation with JWKS...");
794
+ const { payload } = await jwtVerify(token, this.jwks, {
795
+ issuer: this.config.issuerUrl,
796
+ audience: this.config.audience
797
+ });
798
+ logger.debug(
799
+ `JWT validation successful. Subject: ${payload.sub}, Audience: ${payload.aud}`
800
+ );
801
+ if (!payload.sub) {
802
+ throw new Error("JWT payload missing subject claim");
803
+ }
804
+ return {
805
+ token,
806
+ clientId: payload.sub,
807
+ scopes: ["*"]
808
+ // Full access for all authenticated users
809
+ };
810
+ } catch (error) {
811
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
812
+ logger.debug(
813
+ `JWT validation failed: ${errorMessage}, trying userinfo fallback...`
814
+ );
815
+ }
816
+ }
817
+ if (this.discoveredEndpoints?.userinfoUrl) {
818
+ try {
819
+ logger.debug("Attempting userinfo endpoint validation...");
820
+ const response = await fetch(this.discoveredEndpoints.userinfoUrl, {
821
+ method: "GET",
822
+ headers: {
823
+ Authorization: `Bearer ${token}`,
824
+ Accept: "application/json"
825
+ }
826
+ });
827
+ if (!response.ok) {
828
+ throw new Error(
829
+ `Userinfo request failed: ${response.status} ${response.statusText}`
830
+ );
831
+ }
832
+ const userinfo = await response.json();
833
+ logger.debug(
834
+ `Token validation successful. User: ${userinfo.sub}, Email: ${userinfo.email}`
835
+ );
836
+ if (!userinfo.sub) {
837
+ throw new Error("Userinfo response missing subject");
838
+ }
839
+ if (request) {
840
+ const supportedResources = this.getSupportedResources(request);
841
+ logger.debug(`Supported resources: ${JSON.stringify(supportedResources)}`);
842
+ }
843
+ return {
844
+ token,
845
+ clientId: userinfo.sub,
846
+ scopes: ["*"]
847
+ // Full access for all authenticated users
848
+ };
849
+ } catch (error) {
850
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
851
+ logger.debug(`Userinfo validation failed: ${errorMessage}`);
852
+ }
853
+ }
854
+ logger.debug("All token validation strategies exhausted");
855
+ throw new Error("Invalid access token");
856
+ }
857
+ /**
858
+ * Get client information for the given client ID.
859
+ * This is called by the proxy provider for client validation.
860
+ */
861
+ async getClient(clientId) {
862
+ return {
863
+ client_id: clientId,
864
+ redirect_uris: [`${this.config.audience}/callback`]
865
+ // Add other client metadata as needed
866
+ };
867
+ }
868
+ /**
869
+ * Create an authentication context from a token (for compatibility with existing middleware).
870
+ * Uses binary authentication - valid token grants full access.
871
+ */
872
+ async createAuthContext(authorization, request) {
873
+ if (!this.config.enabled) {
874
+ return {
875
+ authenticated: false,
876
+ scopes: /* @__PURE__ */ new Set()
877
+ };
878
+ }
879
+ try {
880
+ logger.debug(
881
+ `Processing authorization header: ${authorization.substring(0, 20)}...`
882
+ );
883
+ const match = authorization.match(/^Bearer\s+(.+)$/i);
884
+ if (!match) {
885
+ logger.debug("Authorization header does not match Bearer token pattern");
886
+ throw new Error("Invalid authorization header format");
887
+ }
888
+ const token = match[1];
889
+ logger.debug(`Extracted token: ${token.substring(0, 20)}...`);
890
+ const authInfo = await this.verifyAccessToken(token, request);
891
+ logger.debug(`Authentication successful for client: ${authInfo.clientId}`);
892
+ return {
893
+ authenticated: true,
894
+ scopes: /* @__PURE__ */ new Set(["*"]),
895
+ // Full access for authenticated users
896
+ subject: authInfo.clientId
897
+ };
898
+ } catch (error) {
899
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
900
+ logger.debug(`Authentication failed: ${errorMessage}`);
901
+ return {
902
+ authenticated: false,
903
+ scopes: /* @__PURE__ */ new Set()
904
+ };
905
+ }
906
+ }
907
+ }
108
908
  var PipelineJobStatus = /* @__PURE__ */ ((PipelineJobStatus2) => {
109
909
  PipelineJobStatus2["QUEUED"] = "queued";
110
910
  PipelineJobStatus2["RUNNING"] = "running";
@@ -185,7 +985,7 @@ class ClearCompletedJobsTool {
185
985
  try {
186
986
  const clearedCount = await this.pipeline.clearCompletedJobs();
187
987
  const message = clearedCount > 0 ? `Successfully cleared ${clearedCount} completed job${clearedCount === 1 ? "" : "s"} from the queue.` : "No completed jobs to clear.";
188
- logger.debug(`[ClearCompletedJobsTool] ${message}`);
988
+ logger.debug(message);
189
989
  return {
190
990
  message,
191
991
  success: true,
@@ -193,7 +993,7 @@ class ClearCompletedJobsTool {
193
993
  };
194
994
  } catch (error) {
195
995
  const errorMessage = `Failed to clear completed jobs: ${error instanceof Error ? error.message : String(error)}`;
196
- logger.error(`❌ [ClearCompletedJobsTool] ${errorMessage}`);
996
+ logger.error(`❌ ${errorMessage}`);
197
997
  return {
198
998
  message: errorMessage,
199
999
  success: false,
@@ -328,14 +1128,43 @@ class HtmlLinkExtractorMiddleware {
328
1128
  return;
329
1129
  }
330
1130
  try {
1131
+ let docBase = context.source;
1132
+ try {
1133
+ const baseEl = $("base[href]").first();
1134
+ const rawBase = baseEl.attr("href");
1135
+ if (rawBase && rawBase.trim() !== "") {
1136
+ try {
1137
+ const trimmed = rawBase.trim();
1138
+ const candidate = new URL(trimmed, context.source);
1139
+ const hasScheme = /^[a-zA-Z][a-zA-Z0-9+.-]*:/.test(trimmed);
1140
+ const protocolRelative = trimmed.startsWith("//");
1141
+ const firstSlash = trimmed.indexOf("/");
1142
+ const firstColon = trimmed.indexOf(":");
1143
+ const colonBeforeSlash = firstColon !== -1 && (firstSlash === -1 || firstColon < firstSlash);
1144
+ const suspiciousColon = colonBeforeSlash && !hasScheme && !protocolRelative;
1145
+ if (suspiciousColon || trimmed.startsWith(":")) {
1146
+ logger.debug(
1147
+ `Ignoring suspicious <base href> value (colon misuse): ${rawBase}`
1148
+ );
1149
+ } else {
1150
+ docBase = candidate.href;
1151
+ }
1152
+ } catch {
1153
+ logger.debug(`Ignoring invalid <base href> value: ${rawBase}`);
1154
+ }
1155
+ }
1156
+ } catch {
1157
+ }
331
1158
  const linkElements = $("a[href]");
332
- logger.debug(`Found ${linkElements.length} potential links in ${context.source}`);
1159
+ logger.debug(
1160
+ `Found ${linkElements.length} potential links in ${context.source} (base=${docBase})`
1161
+ );
333
1162
  const extractedLinks = [];
334
1163
  linkElements.each((_index, element) => {
335
1164
  const href = $(element).attr("href");
336
1165
  if (href && href.trim() !== "") {
337
1166
  try {
338
- const urlObj = new URL(href, context.source);
1167
+ const urlObj = new URL(href, docBase);
339
1168
  if (!["http:", "https:", "file:"].includes(urlObj.protocol)) {
340
1169
  logger.debug(`Ignoring link with invalid protocol: ${href}`);
341
1170
  return;
@@ -1110,7 +1939,7 @@ class FetchUrlTool {
1110
1939
  async execute(options) {
1111
1940
  const { url, scrapeMode = ScrapeMode.Auto, headers } = options;
1112
1941
  const canFetchResults = this.fetchers.map((f) => f.canFetch(url));
1113
- const fetcherIndex = canFetchResults.findIndex((result) => result === true);
1942
+ const fetcherIndex = canFetchResults.indexOf(true);
1114
1943
  if (fetcherIndex === -1) {
1115
1944
  throw new ToolError(
1116
1945
  `Invalid URL: ${url}. Must be an HTTP/HTTPS URL or a file:// URL.`,
@@ -1343,14 +2172,13 @@ class RemoveTool {
1343
2172
  this.pipeline = pipeline;
1344
2173
  }
1345
2174
  /**
1346
- * Executes the tool to remove the specified library version documents.
2175
+ * Executes the tool to remove the specified library version completely.
1347
2176
  * Aborts any QUEUED/RUNNING job for the same library+version before deleting.
2177
+ * Removes all documents, the version record, and the library if no other versions exist.
1348
2178
  */
1349
2179
  async execute(args) {
1350
2180
  const { library, version: version2 } = args;
1351
- logger.info(
1352
- `🗑️ Removing library: ${library}${version2 ? `, version: ${version2}` : " (unversioned)"}`
1353
- );
2181
+ logger.info(`🗑️ Removing library: ${library}${version2 ? `@${version2}` : ""}`);
1354
2182
  try {
1355
2183
  const allJobs = await this.pipeline.getJobs();
1356
2184
  const jobs = allJobs.filter(
@@ -1363,12 +2191,12 @@ class RemoveTool {
1363
2191
  await this.pipeline.cancelJob(job.id);
1364
2192
  await this.pipeline.waitForJobCompletion(job.id);
1365
2193
  }
1366
- await this.documentManagementService.removeAllDocuments(library, version2);
1367
- const message = `Successfully removed documents for ${library}${version2 ? `@${version2}` : " (unversioned)"}.`;
2194
+ await this.documentManagementService.removeVersion(library, version2);
2195
+ const message = `Successfully removed ${library}${version2 ? `@${version2}` : ""}.`;
1368
2196
  logger.info(`✅ ${message}`);
1369
2197
  return { message };
1370
2198
  } catch (error) {
1371
- const errorMessage = `Failed to remove documents for ${library}${version2 ? `@${version2}` : " (unversioned)"}: ${error instanceof Error ? error.message : String(error)}`;
2199
+ const errorMessage = `Failed to remove ${library}${version2 ? `@${version2}` : ""}: ${error instanceof Error ? error.message : String(error)}`;
1372
2200
  logger.error(`❌ Error removing library: ${errorMessage}`);
1373
2201
  throw new ToolError(errorMessage, this.constructor.name);
1374
2202
  }
@@ -1517,7 +2345,7 @@ function createError(text) {
1517
2345
  isError: true
1518
2346
  };
1519
2347
  }
1520
- function createMcpServerInstance(tools) {
2348
+ function createMcpServerInstance(tools, readOnly = false) {
1521
2349
  const server = new McpServer(
1522
2350
  {
1523
2351
  name: "docs-mcp-server",
@@ -1531,54 +2359,56 @@ function createMcpServerInstance(tools) {
1531
2359
  }
1532
2360
  }
1533
2361
  );
1534
- server.tool(
1535
- "scrape_docs",
1536
- "Scrape and index documentation from a URL for a library. Use this tool to index a new library or a new version.",
1537
- {
1538
- url: z.string().url().describe("Documentation root URL to scrape."),
1539
- library: z.string().describe("Library name."),
1540
- version: z.string().optional().describe("Library version (optional)."),
1541
- maxPages: z.number().optional().default(DEFAULT_MAX_PAGES).describe(`Maximum number of pages to scrape (default: ${DEFAULT_MAX_PAGES}).`),
1542
- maxDepth: z.number().optional().default(DEFAULT_MAX_DEPTH$1).describe(`Maximum navigation depth (default: ${DEFAULT_MAX_DEPTH$1}).`),
1543
- scope: z.enum(["subpages", "hostname", "domain"]).optional().default("subpages").describe("Crawling boundary: 'subpages', 'hostname', or 'domain'."),
1544
- followRedirects: z.boolean().optional().default(true).describe("Follow HTTP redirects (3xx responses).")
1545
- },
1546
- {
1547
- title: "Scrape New Library Documentation",
1548
- destructiveHint: true,
1549
- // replaces existing docs
1550
- openWorldHint: true
1551
- // requires internet access
1552
- },
1553
- async ({ url, library, version: version2, maxPages, maxDepth, scope, followRedirects }) => {
1554
- try {
1555
- const result = await tools.scrape.execute({
1556
- url,
1557
- library,
1558
- version: version2,
1559
- waitForCompletion: false,
1560
- // Don't wait for completion
1561
- // onProgress: undefined, // Explicitly undefined or omitted
1562
- options: {
1563
- maxPages,
1564
- maxDepth,
1565
- scope,
1566
- followRedirects
2362
+ if (!readOnly) {
2363
+ server.tool(
2364
+ "scrape_docs",
2365
+ "Scrape and index documentation from a URL for a library. Use this tool to index a new library or a new version.",
2366
+ {
2367
+ url: z.string().url().describe("Documentation root URL to scrape."),
2368
+ library: z.string().describe("Library name."),
2369
+ version: z.string().optional().describe("Library version (optional)."),
2370
+ maxPages: z.number().optional().default(DEFAULT_MAX_PAGES).describe(`Maximum number of pages to scrape (default: ${DEFAULT_MAX_PAGES}).`),
2371
+ maxDepth: z.number().optional().default(DEFAULT_MAX_DEPTH$1).describe(`Maximum navigation depth (default: ${DEFAULT_MAX_DEPTH$1}).`),
2372
+ scope: z.enum(["subpages", "hostname", "domain"]).optional().default("subpages").describe("Crawling boundary: 'subpages', 'hostname', or 'domain'."),
2373
+ followRedirects: z.boolean().optional().default(true).describe("Follow HTTP redirects (3xx responses).")
2374
+ },
2375
+ {
2376
+ title: "Scrape New Library Documentation",
2377
+ destructiveHint: true,
2378
+ // replaces existing docs
2379
+ openWorldHint: true
2380
+ // requires internet access
2381
+ },
2382
+ async ({ url, library, version: version2, maxPages, maxDepth, scope, followRedirects }) => {
2383
+ try {
2384
+ const result = await tools.scrape.execute({
2385
+ url,
2386
+ library,
2387
+ version: version2,
2388
+ waitForCompletion: false,
2389
+ // Don't wait for completion
2390
+ // onProgress: undefined, // Explicitly undefined or omitted
2391
+ options: {
2392
+ maxPages,
2393
+ maxDepth,
2394
+ scope,
2395
+ followRedirects
2396
+ }
2397
+ });
2398
+ if ("jobId" in result) {
2399
+ return createResponse(`🚀 Scraping job started with ID: ${result.jobId}.`);
1567
2400
  }
1568
- });
1569
- if ("jobId" in result) {
1570
- return createResponse(`🚀 Scraping job started with ID: ${result.jobId}.`);
2401
+ return createResponse(
2402
+ `Scraping finished immediately (unexpectedly) with ${result.pagesScraped} pages.`
2403
+ );
2404
+ } catch (error) {
2405
+ return createError(
2406
+ `Failed to scrape documentation: ${error instanceof Error ? error.message : String(error)}`
2407
+ );
1571
2408
  }
1572
- return createResponse(
1573
- `Scraping finished immediately (unexpectedly) with ${result.pagesScraped} pages.`
1574
- );
1575
- } catch (error) {
1576
- return createError(
1577
- `Failed to scrape documentation: ${error instanceof Error ? error.message : String(error)}`
1578
- );
1579
2409
  }
1580
- }
1581
- );
2410
+ );
2411
+ }
1582
2412
  server.tool(
1583
2413
  "search_docs",
1584
2414
  'Search up-to-date documentation for a library or package. Examples:\n\n- {library: "react", query: "hooks lifecycle"} -> matches latest version of React\n- {library: "react", version: "18.0.0", query: "hooks lifecycle"} -> matches React 18.0.0 or earlier\n- {library: "typescript", version: "5.x", query: "ReturnType example"} -> any TypeScript 5.x.x version\n- {library: "typescript", version: "5.2.x", query: "ReturnType example"} -> any TypeScript 5.2.x version',
@@ -1699,24 +2529,25 @@ ${result.libraries.map((lib) => `- ${lib.name}`).join("\n")}`
1699
2529
  }
1700
2530
  }
1701
2531
  );
1702
- server.tool(
1703
- "list_jobs",
1704
- "List all indexing jobs. Optionally filter by status.",
1705
- {
1706
- status: z.enum(["queued", "running", "completed", "failed", "cancelling", "cancelled"]).optional().describe("Filter jobs by status (optional).")
1707
- },
1708
- {
1709
- title: "List Indexing Jobs",
1710
- readOnlyHint: true,
1711
- destructiveHint: false
1712
- },
1713
- async ({ status }) => {
1714
- try {
1715
- const result = await tools.listJobs.execute({
1716
- status
1717
- });
1718
- const formattedJobs = result.jobs.map(
1719
- (job) => `- ID: ${job.id}
2532
+ if (!readOnly) {
2533
+ server.tool(
2534
+ "list_jobs",
2535
+ "List all indexing jobs. Optionally filter by status.",
2536
+ {
2537
+ status: z.enum(["queued", "running", "completed", "failed", "cancelling", "cancelled"]).optional().describe("Filter jobs by status (optional).")
2538
+ },
2539
+ {
2540
+ title: "List Indexing Jobs",
2541
+ readOnlyHint: true,
2542
+ destructiveHint: false
2543
+ },
2544
+ async ({ status }) => {
2545
+ try {
2546
+ const result = await tools.listJobs.execute({
2547
+ status
2548
+ });
2549
+ const formattedJobs = result.jobs.map(
2550
+ (job) => `- ID: ${job.id}
1720
2551
  Status: ${job.status}
1721
2552
  Library: ${job.library}
1722
2553
  Version: ${job.version}
@@ -1724,100 +2555,101 @@ ${result.libraries.map((lib) => `- ${lib.name}`).join("\n")}`
1724
2555
  Started: ${job.startedAt}` : ""}${job.finishedAt ? `
1725
2556
  Finished: ${job.finishedAt}` : ""}${job.error ? `
1726
2557
  Error: ${job.error}` : ""}`
1727
- ).join("\n\n");
1728
- return createResponse(
1729
- result.jobs.length > 0 ? `Current Jobs:
2558
+ ).join("\n\n");
2559
+ return createResponse(
2560
+ result.jobs.length > 0 ? `Current Jobs:
1730
2561
 
1731
2562
  ${formattedJobs}` : "No jobs found."
1732
- );
1733
- } catch (error) {
1734
- return createError(
1735
- `Failed to list jobs: ${error instanceof Error ? error.message : String(error)}`
1736
- );
1737
- }
1738
- }
1739
- );
1740
- server.tool(
1741
- "get_job_info",
1742
- "Get details for a specific indexing job. Use the 'list_jobs' tool to find the job ID.",
1743
- {
1744
- jobId: z.string().uuid().describe("Job ID to query.")
1745
- },
1746
- {
1747
- title: "Get Indexing Job Info",
1748
- readOnlyHint: true,
1749
- destructiveHint: false
1750
- },
1751
- async ({ jobId }) => {
1752
- try {
1753
- const result = await tools.getJobInfo.execute({ jobId });
1754
- if (!result.job) {
1755
- return createError(`Job with ID ${jobId} not found.`);
2563
+ );
2564
+ } catch (error) {
2565
+ return createError(
2566
+ `Failed to list jobs: ${error instanceof Error ? error.message : String(error)}`
2567
+ );
1756
2568
  }
1757
- const job = result.job;
1758
- const formattedJob = `- ID: ${job.id}
2569
+ }
2570
+ );
2571
+ server.tool(
2572
+ "get_job_info",
2573
+ "Get details for a specific indexing job. Use the 'list_jobs' tool to find the job ID.",
2574
+ {
2575
+ jobId: z.string().uuid().describe("Job ID to query.")
2576
+ },
2577
+ {
2578
+ title: "Get Indexing Job Info",
2579
+ readOnlyHint: true,
2580
+ destructiveHint: false
2581
+ },
2582
+ async ({ jobId }) => {
2583
+ try {
2584
+ const result = await tools.getJobInfo.execute({ jobId });
2585
+ if (!result.job) {
2586
+ return createError(`Job with ID ${jobId} not found.`);
2587
+ }
2588
+ const job = result.job;
2589
+ const formattedJob = `- ID: ${job.id}
1759
2590
  Status: ${job.status}
1760
2591
  Library: ${job.library}@${job.version}
1761
2592
  Created: ${job.createdAt}${job.startedAt ? `
1762
2593
  Started: ${job.startedAt}` : ""}${job.finishedAt ? `
1763
2594
  Finished: ${job.finishedAt}` : ""}${job.error ? `
1764
2595
  Error: ${job.error}` : ""}`;
1765
- return createResponse(`Job Info:
2596
+ return createResponse(`Job Info:
1766
2597
 
1767
2598
  ${formattedJob}`);
1768
- } catch (error) {
1769
- return createError(
1770
- `Failed to get job info for ${jobId}: ${error instanceof Error ? error.message : String(error)}`
1771
- );
2599
+ } catch (error) {
2600
+ return createError(
2601
+ `Failed to get job info for ${jobId}: ${error instanceof Error ? error.message : String(error)}`
2602
+ );
2603
+ }
1772
2604
  }
1773
- }
1774
- );
1775
- server.tool(
1776
- "cancel_job",
1777
- "Cancel a queued or running indexing job. Use the 'list_jobs' tool to find the job ID.",
1778
- {
1779
- jobId: z.string().uuid().describe("Job ID to cancel.")
1780
- },
1781
- {
1782
- title: "Cancel Indexing Job",
1783
- destructiveHint: true
1784
- },
1785
- async ({ jobId }) => {
1786
- try {
1787
- const result = await tools.cancelJob.execute({ jobId });
1788
- if (result.success) {
1789
- return createResponse(result.message);
2605
+ );
2606
+ server.tool(
2607
+ "cancel_job",
2608
+ "Cancel a queued or running indexing job. Use the 'list_jobs' tool to find the job ID.",
2609
+ {
2610
+ jobId: z.string().uuid().describe("Job ID to cancel.")
2611
+ },
2612
+ {
2613
+ title: "Cancel Indexing Job",
2614
+ destructiveHint: true
2615
+ },
2616
+ async ({ jobId }) => {
2617
+ try {
2618
+ const result = await tools.cancelJob.execute({ jobId });
2619
+ if (result.success) {
2620
+ return createResponse(result.message);
2621
+ }
2622
+ return createError(result.message);
2623
+ } catch (error) {
2624
+ return createError(
2625
+ `Failed to cancel job ${jobId}: ${error instanceof Error ? error.message : String(error)}`
2626
+ );
1790
2627
  }
1791
- return createError(result.message);
1792
- } catch (error) {
1793
- return createError(
1794
- `Failed to cancel job ${jobId}: ${error instanceof Error ? error.message : String(error)}`
1795
- );
1796
2628
  }
1797
- }
1798
- );
1799
- server.tool(
1800
- "remove_docs",
1801
- "Remove indexed documentation for a library version. Use only if explicitly instructed.",
1802
- {
1803
- library: z.string().describe("Library name."),
1804
- version: z.string().optional().describe("Library version (optional, removes unversioned if omitted).")
1805
- },
1806
- {
1807
- title: "Remove Library Documentation",
1808
- destructiveHint: true
1809
- },
1810
- async ({ library, version: version2 }) => {
1811
- try {
1812
- const result = await tools.remove.execute({ library, version: version2 });
1813
- return createResponse(result.message);
1814
- } catch (error) {
1815
- return createError(
1816
- `Failed to remove documents: ${error instanceof Error ? error.message : String(error)}`
1817
- );
2629
+ );
2630
+ server.tool(
2631
+ "remove_docs",
2632
+ "Remove indexed documentation for a library version. Use only if explicitly instructed.",
2633
+ {
2634
+ library: z.string().describe("Library name."),
2635
+ version: z.string().optional().describe("Library version (optional, removes unversioned if omitted).")
2636
+ },
2637
+ {
2638
+ title: "Remove Library Documentation",
2639
+ destructiveHint: true
2640
+ },
2641
+ async ({ library, version: version2 }) => {
2642
+ try {
2643
+ const result = await tools.remove.execute({ library, version: version2 });
2644
+ return createResponse(result.message);
2645
+ } catch (error) {
2646
+ return createError(
2647
+ `Failed to remove documents: ${error instanceof Error ? error.message : String(error)}`
2648
+ );
2649
+ }
1818
2650
  }
1819
- }
1820
- );
2651
+ );
2652
+ }
1821
2653
  server.tool(
1822
2654
  "fetch_url",
1823
2655
  "Fetch a single URL and convert its content to Markdown. Use this tool to read the content of any web page.",
@@ -1881,74 +2713,76 @@ ${formattedJob}`);
1881
2713
  };
1882
2714
  }
1883
2715
  );
1884
- server.resource(
1885
- "jobs",
1886
- "docs://jobs",
1887
- {
1888
- description: "List indexing jobs, optionally filtering by status.",
1889
- mimeType: "application/json"
1890
- },
1891
- async (uri) => {
1892
- const statusParam = uri.searchParams.get("status");
1893
- let statusFilter;
1894
- if (statusParam) {
1895
- const validation = z.nativeEnum(PipelineJobStatus).safeParse(statusParam);
1896
- if (validation.success) {
1897
- statusFilter = validation.data;
1898
- } else {
1899
- logger.warn(`⚠️ Invalid status parameter received: ${statusParam}`);
2716
+ if (!readOnly) {
2717
+ server.resource(
2718
+ "jobs",
2719
+ "docs://jobs",
2720
+ {
2721
+ description: "List indexing jobs, optionally filtering by status.",
2722
+ mimeType: "application/json"
2723
+ },
2724
+ async (uri) => {
2725
+ const statusParam = uri.searchParams.get("status");
2726
+ let statusFilter;
2727
+ if (statusParam) {
2728
+ const validation = z.nativeEnum(PipelineJobStatus).safeParse(statusParam);
2729
+ if (validation.success) {
2730
+ statusFilter = validation.data;
2731
+ } else {
2732
+ logger.warn(`⚠️ Invalid status parameter received: ${statusParam}`);
2733
+ }
1900
2734
  }
1901
- }
1902
- const result = await tools.listJobs.execute({ status: statusFilter });
1903
- return {
1904
- contents: result.jobs.map((job) => ({
1905
- uri: new URL(job.id, uri).href,
1906
- mimeType: "application/json",
1907
- text: JSON.stringify({
1908
- id: job.id,
1909
- library: job.library,
1910
- version: job.version,
1911
- status: job.status,
1912
- error: job.error || void 0
1913
- })
1914
- }))
1915
- };
1916
- }
1917
- );
1918
- server.resource(
1919
- "job",
1920
- // A distinct name for this specific resource type
1921
- new ResourceTemplate("docs://jobs/{jobId}", { list: void 0 }),
1922
- {
1923
- description: "Get details for a specific indexing job by ID.",
1924
- mimeType: "application/json"
1925
- },
1926
- async (uri, { jobId }) => {
1927
- if (typeof jobId !== "string" || jobId.length === 0) {
1928
- logger.warn(`⚠️ Invalid jobId received in URI: ${jobId}`);
1929
- return { contents: [] };
1930
- }
1931
- const result = await tools.getJobInfo.execute({ jobId });
1932
- if (!result.job) {
1933
- return { contents: [] };
1934
- }
1935
- return {
1936
- contents: [
1937
- {
1938
- uri: uri.href,
2735
+ const result = await tools.listJobs.execute({ status: statusFilter });
2736
+ return {
2737
+ contents: result.jobs.map((job) => ({
2738
+ uri: new URL(job.id, uri).href,
1939
2739
  mimeType: "application/json",
1940
2740
  text: JSON.stringify({
1941
- id: result.job.id,
1942
- library: result.job.library,
1943
- version: result.job.version,
1944
- status: result.job.status,
1945
- error: result.job.error || void 0
2741
+ id: job.id,
2742
+ library: job.library,
2743
+ version: job.version,
2744
+ status: job.status,
2745
+ error: job.error || void 0
1946
2746
  })
1947
- }
1948
- ]
1949
- };
1950
- }
1951
- );
2747
+ }))
2748
+ };
2749
+ }
2750
+ );
2751
+ server.resource(
2752
+ "job",
2753
+ // A distinct name for this specific resource type
2754
+ new ResourceTemplate("docs://jobs/{jobId}", { list: void 0 }),
2755
+ {
2756
+ description: "Get details for a specific indexing job by ID.",
2757
+ mimeType: "application/json"
2758
+ },
2759
+ async (uri, { jobId }) => {
2760
+ if (typeof jobId !== "string" || jobId.length === 0) {
2761
+ logger.warn(`⚠️ Invalid jobId received in URI: ${jobId}`);
2762
+ return { contents: [] };
2763
+ }
2764
+ const result = await tools.getJobInfo.execute({ jobId });
2765
+ if (!result.job) {
2766
+ return { contents: [] };
2767
+ }
2768
+ return {
2769
+ contents: [
2770
+ {
2771
+ uri: uri.href,
2772
+ mimeType: "application/json",
2773
+ text: JSON.stringify({
2774
+ id: result.job.id,
2775
+ library: result.job.library,
2776
+ version: result.job.version,
2777
+ status: result.job.status,
2778
+ error: result.job.error || void 0
2779
+ })
2780
+ }
2781
+ ]
2782
+ };
2783
+ }
2784
+ );
2785
+ }
1952
2786
  return server;
1953
2787
  }
1954
2788
  class FileFetcher {
@@ -1963,7 +2797,7 @@ class FileFetcher {
1963
2797
  const rawPath = source.replace("file://", "");
1964
2798
  const filePath = decodeURIComponent(rawPath);
1965
2799
  try {
1966
- const content = await fs.readFile(filePath);
2800
+ const content = await fs$1.readFile(filePath);
1967
2801
  const ext = path.extname(filePath).toLowerCase();
1968
2802
  const mimeType = mime.lookup(ext) || "application/octet-stream";
1969
2803
  return {
@@ -2054,9 +2888,49 @@ class HttpFetcher {
2054
2888
  return new Promise((resolve) => setTimeout(resolve, ms));
2055
2889
  }
2056
2890
  async fetch(source, options) {
2891
+ const startTime = performance.now();
2057
2892
  const maxRetries = options?.maxRetries ?? FETCHER_MAX_RETRIES;
2058
2893
  const baseDelay = options?.retryDelay ?? FETCHER_BASE_DELAY;
2059
2894
  const followRedirects = options?.followRedirects ?? true;
2895
+ try {
2896
+ const result = await this.performFetch(
2897
+ source,
2898
+ options,
2899
+ maxRetries,
2900
+ baseDelay,
2901
+ followRedirects
2902
+ );
2903
+ const duration = performance.now() - startTime;
2904
+ analytics.track("http_request_completed", {
2905
+ success: true,
2906
+ hostname: extractHostname(source),
2907
+ protocol: extractProtocol(source),
2908
+ duration_ms: Math.round(duration),
2909
+ content_size_bytes: result.content.length,
2910
+ mime_type: result.mimeType,
2911
+ has_encoding: !!result.encoding,
2912
+ follow_redirects: followRedirects,
2913
+ had_redirects: result.source !== source
2914
+ });
2915
+ return result;
2916
+ } catch (error) {
2917
+ const duration = performance.now() - startTime;
2918
+ const axiosError = error;
2919
+ const status = axiosError.response?.status;
2920
+ analytics.track("http_request_completed", {
2921
+ success: false,
2922
+ hostname: extractHostname(source),
2923
+ protocol: extractProtocol(source),
2924
+ duration_ms: Math.round(duration),
2925
+ status_code: status,
2926
+ error_type: error instanceof CancellationError ? "cancellation" : error instanceof RedirectError ? "redirect" : error instanceof ScraperError ? "scraper" : "unknown",
2927
+ error_code: axiosError.code,
2928
+ follow_redirects: followRedirects
2929
+ });
2930
+ throw error;
2931
+ }
2932
+ }
2933
+ async performFetch(source, options, maxRetries = FETCHER_MAX_RETRIES, baseDelay = FETCHER_BASE_DELAY, followRedirects = true) {
2060
2934
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
2061
2935
  try {
2062
2936
  const fingerprint = this.fingerprintGenerator.generateHeaders();
@@ -2094,12 +2968,18 @@ class HttpFetcher {
2094
2968
  } else {
2095
2969
  content = Buffer.from(response.data);
2096
2970
  }
2971
+ const finalUrl = (
2972
+ // Node follow-redirects style
2973
+ response.request?.res?.responseUrl || // Some adapters may expose directly
2974
+ response.request?.responseUrl || // Fallback to axios recorded config URL
2975
+ response.config?.url || source
2976
+ );
2097
2977
  return {
2098
2978
  content,
2099
2979
  mimeType,
2100
2980
  charset,
2101
2981
  encoding: contentEncoding,
2102
- source
2982
+ source: finalUrl
2103
2983
  };
2104
2984
  } catch (error) {
2105
2985
  const axiosError = error;
@@ -2150,20 +3030,35 @@ async function initializeTools(docService, pipeline) {
2150
3030
  };
2151
3031
  return tools;
2152
3032
  }
2153
- async function registerMcpService(server, docService, pipeline) {
3033
+ async function registerMcpService(server, docService, pipeline, readOnly = false, authManager) {
2154
3034
  const mcpTools = await initializeTools(docService, pipeline);
2155
- const mcpServer = createMcpServerInstance(mcpTools);
3035
+ const mcpServer = createMcpServerInstance(mcpTools, readOnly);
3036
+ const authMiddleware = authManager ? createAuthMiddleware(authManager) : null;
2156
3037
  const sseTransports = {};
2157
3038
  server.route({
2158
3039
  method: "GET",
2159
3040
  url: "/sse",
3041
+ preHandler: authMiddleware ? [authMiddleware] : void 0,
2160
3042
  handler: async (_request, reply) => {
2161
3043
  try {
2162
3044
  const transport = new SSEServerTransport("/messages", reply.raw);
2163
3045
  sseTransports[transport.sessionId] = transport;
3046
+ if (analytics.isEnabled()) {
3047
+ const session = createMcpSession({
3048
+ protocol: "http",
3049
+ transport: "sse",
3050
+ authEnabled: !!authManager,
3051
+ readOnly,
3052
+ servicesEnabled: ["mcp"]
3053
+ });
3054
+ analytics.startSession(session);
3055
+ }
2164
3056
  reply.raw.on("close", () => {
2165
3057
  delete sseTransports[transport.sessionId];
2166
3058
  transport.close();
3059
+ if (analytics.isEnabled()) {
3060
+ analytics.endSession();
3061
+ }
2167
3062
  });
2168
3063
  await mcpServer.connect(transport);
2169
3064
  } catch (error) {
@@ -2198,16 +3093,30 @@ async function registerMcpService(server, docService, pipeline) {
2198
3093
  server.route({
2199
3094
  method: "POST",
2200
3095
  url: "/mcp",
3096
+ preHandler: authMiddleware ? [authMiddleware] : void 0,
2201
3097
  handler: async (request, reply) => {
2202
3098
  try {
2203
- const requestServer = createMcpServerInstance(mcpTools);
3099
+ const requestServer = createMcpServerInstance(mcpTools, readOnly);
2204
3100
  const requestTransport = new StreamableHTTPServerTransport({
2205
3101
  sessionIdGenerator: void 0
2206
3102
  });
3103
+ if (analytics.isEnabled()) {
3104
+ const session = createMcpSession({
3105
+ protocol: "http",
3106
+ transport: "streamable",
3107
+ authEnabled: !!authManager,
3108
+ readOnly,
3109
+ servicesEnabled: ["mcp"]
3110
+ });
3111
+ analytics.startSession(session);
3112
+ }
2207
3113
  reply.raw.on("close", () => {
2208
3114
  logger.debug("Streamable HTTP request closed");
2209
3115
  requestTransport.close();
2210
3116
  requestServer.close();
3117
+ if (analytics.isEnabled()) {
3118
+ analytics.endSession();
3119
+ }
2211
3120
  });
2212
3121
  await requestServer.connect(requestTransport);
2213
3122
  await requestTransport.handleRequest(request.raw, reply.raw, request.body);
@@ -3031,410 +3940,475 @@ const Tooltip = ({ text, position = "top" }) => {
3031
3940
  }
3032
3941
  );
3033
3942
  };
3034
- const ScrapeFormContent = () => /* @__PURE__ */ jsxs("div", { class: "mt-4 p-4 bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-300 dark:border-gray-600", children: [
3035
- /* @__PURE__ */ jsx("h3", { class: "text-xl font-semibold text-gray-900 dark:text-white mb-2", children: "Queue New Scrape Job" }),
3036
- /* @__PURE__ */ jsxs(
3037
- "form",
3038
- {
3039
- "hx-post": "/web/jobs/scrape",
3040
- "hx-target": "#job-response",
3041
- "hx-swap": "innerHTML",
3042
- class: "space-y-2",
3043
- "x-data": "{\n url: '',\n hasPath: false,\n headers: [],\n checkUrlPath() {\n try {\n const url = new URL(this.url);\n this.hasPath = url.pathname !== '/' && url.pathname !== '';\n } catch (e) {\n this.hasPath = false;\n }\n }\n }",
3044
- children: [
3045
- /* @__PURE__ */ jsxs("div", { children: [
3046
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3943
+ const ScrapeFormContent = ({ defaultExcludePatterns }) => {
3944
+ const defaultExcludePatternsText = defaultExcludePatterns?.join("\n") || "";
3945
+ return /* @__PURE__ */ jsxs("div", { class: "mt-4 p-4 bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-300 dark:border-gray-600", children: [
3946
+ /* @__PURE__ */ jsx("h3", { class: "text-xl font-semibold text-gray-900 dark:text-white mb-2", children: "Queue New Scrape Job" }),
3947
+ /* @__PURE__ */ jsxs(
3948
+ "form",
3949
+ {
3950
+ "hx-post": "/web/jobs/scrape",
3951
+ "hx-target": "#job-response",
3952
+ "hx-swap": "innerHTML",
3953
+ class: "space-y-2",
3954
+ "x-data": "{\n url: '',\n hasPath: false,\n headers: [],\n checkUrlPath() {\n try {\n const url = new URL(this.url);\n this.hasPath = url.pathname !== '/' && url.pathname !== '';\n } catch (e) {\n this.hasPath = false;\n }\n }\n }",
3955
+ children: [
3956
+ /* @__PURE__ */ jsxs("div", { children: [
3957
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3958
+ /* @__PURE__ */ jsx(
3959
+ "label",
3960
+ {
3961
+ for: "url",
3962
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3963
+ children: "URL"
3964
+ }
3965
+ ),
3966
+ /* @__PURE__ */ jsx(
3967
+ Tooltip,
3968
+ {
3969
+ text: /* @__PURE__ */ jsxs("div", { children: [
3970
+ /* @__PURE__ */ jsx("p", { children: "Enter the URL of the documentation you want to scrape." }),
3971
+ /* @__PURE__ */ jsxs("p", { class: "mt-2", children: [
3972
+ "For local files/folders, you must use the ",
3973
+ /* @__PURE__ */ jsx("code", { children: "file://" }),
3974
+ " ",
3975
+ "prefix and ensure the path is accessible to the server."
3976
+ ] }),
3977
+ /* @__PURE__ */ jsxs("p", { class: "mt-2", children: [
3978
+ "If running in Docker, ",
3979
+ /* @__PURE__ */ jsx("b", { children: "mount the folder" }),
3980
+ " (see README for details)."
3981
+ ] })
3982
+ ] })
3983
+ }
3984
+ )
3985
+ ] }),
3047
3986
  /* @__PURE__ */ jsx(
3048
- "label",
3987
+ "input",
3049
3988
  {
3050
- for: "url",
3051
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3052
- children: "URL"
3989
+ type: "url",
3990
+ name: "url",
3991
+ id: "url",
3992
+ required: true,
3993
+ "x-model": "url",
3994
+ "x-on:input": "checkUrlPath",
3995
+ "x-on:paste": "$nextTick(() => checkUrlPath())",
3996
+ class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3053
3997
  }
3054
3998
  ),
3055
3999
  /* @__PURE__ */ jsx(
3056
- Tooltip,
4000
+ "div",
3057
4001
  {
3058
- text: /* @__PURE__ */ jsxs("div", { children: [
3059
- /* @__PURE__ */ jsx("p", { children: "Enter the URL of the documentation you want to scrape." }),
3060
- /* @__PURE__ */ jsxs("p", { class: "mt-2", children: [
3061
- "For local files/folders, you must use the ",
3062
- /* @__PURE__ */ jsx("code", { children: "file://" }),
3063
- " ",
3064
- "prefix and ensure the path is accessible to the server."
3065
- ] }),
3066
- /* @__PURE__ */ jsxs("p", { class: "mt-2", children: [
3067
- "If running in Docker, ",
3068
- /* @__PURE__ */ jsx("b", { children: "mount the folder" }),
3069
- " (see README for details)."
3070
- ] })
3071
- ] })
4002
+ "x-show": "hasPath && !(url.startsWith('file://'))",
4003
+ "x-cloak": true,
4004
+ "x-transition:enter": "transition ease-out duration-300",
4005
+ "x-transition:enter-start": "opacity-0 transform -translate-y-2",
4006
+ "x-transition:enter-end": "opacity-100 transform translate-y-0",
4007
+ class: "mt-2",
4008
+ children: /* @__PURE__ */ jsx(
4009
+ Alert,
4010
+ {
4011
+ type: "info",
4012
+ message: "By default, only subpages under the given URL will be scraped. To scrape the whole website, adjust the 'Scope' option in Advanced Options."
4013
+ }
4014
+ )
3072
4015
  }
3073
4016
  )
3074
4017
  ] }),
3075
- /* @__PURE__ */ jsx(
3076
- "input",
3077
- {
3078
- type: "url",
3079
- name: "url",
3080
- id: "url",
3081
- required: true,
3082
- "x-model": "url",
3083
- "x-on:input": "checkUrlPath",
3084
- "x-on:paste": "$nextTick(() => checkUrlPath())",
3085
- class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3086
- }
3087
- ),
3088
- /* @__PURE__ */ jsx(
3089
- "div",
3090
- {
3091
- "x-show": "hasPath && !(url.startsWith('file://'))",
3092
- "x-cloak": true,
3093
- "x-transition:enter": "transition ease-out duration-300",
3094
- "x-transition:enter-start": "opacity-0 transform -translate-y-2",
3095
- "x-transition:enter-end": "opacity-100 transform translate-y-0",
3096
- class: "mt-2",
3097
- children: /* @__PURE__ */ jsx(
3098
- Alert,
4018
+ /* @__PURE__ */ jsxs("div", { children: [
4019
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4020
+ /* @__PURE__ */ jsx(
4021
+ "label",
3099
4022
  {
3100
- type: "info",
3101
- message: "By default, only subpages under the given URL will be scraped. To scrape the whole website, adjust the 'Scope' option in Advanced Options."
4023
+ for: "library",
4024
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4025
+ children: "Library Name"
3102
4026
  }
3103
- )
3104
- }
3105
- )
3106
- ] }),
3107
- /* @__PURE__ */ jsxs("div", { children: [
3108
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4027
+ ),
4028
+ /* @__PURE__ */ jsx(Tooltip, { text: "The name of the library you're documenting. This will be used when searching." })
4029
+ ] }),
3109
4030
  /* @__PURE__ */ jsx(
3110
- "label",
4031
+ "input",
3111
4032
  {
3112
- for: "library",
3113
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3114
- children: "Library Name"
4033
+ type: "text",
4034
+ name: "library",
4035
+ id: "library",
4036
+ required: true,
4037
+ class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3115
4038
  }
3116
- ),
3117
- /* @__PURE__ */ jsx(Tooltip, { text: "The name of the library you're documenting. This will be used when searching." })
4039
+ )
3118
4040
  ] }),
3119
- /* @__PURE__ */ jsx(
3120
- "input",
3121
- {
3122
- type: "text",
3123
- name: "library",
3124
- id: "library",
3125
- required: true,
3126
- class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3127
- }
3128
- )
3129
- ] }),
3130
- /* @__PURE__ */ jsxs("div", { children: [
3131
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4041
+ /* @__PURE__ */ jsxs("div", { children: [
4042
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4043
+ /* @__PURE__ */ jsx(
4044
+ "label",
4045
+ {
4046
+ for: "version",
4047
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4048
+ children: "Version (optional)"
4049
+ }
4050
+ ),
4051
+ /* @__PURE__ */ jsx(Tooltip, { text: "Specify the version of the library documentation you're indexing. This allows for version-specific searches." })
4052
+ ] }),
3132
4053
  /* @__PURE__ */ jsx(
3133
- "label",
4054
+ "input",
3134
4055
  {
3135
- for: "version",
3136
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3137
- children: "Version (optional)"
4056
+ type: "text",
4057
+ name: "version",
4058
+ id: "version",
4059
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3138
4060
  }
3139
- ),
3140
- /* @__PURE__ */ jsx(Tooltip, { text: "Specify the version of the library documentation you're indexing. This allows for version-specific searches." })
4061
+ )
3141
4062
  ] }),
3142
- /* @__PURE__ */ jsx(
3143
- "input",
3144
- {
3145
- type: "text",
3146
- name: "version",
3147
- id: "version",
3148
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3149
- }
3150
- )
3151
- ] }),
3152
- /* @__PURE__ */ jsxs("details", { class: "bg-gray-50 dark:bg-gray-900 p-2 rounded-md", children: [
3153
- /* @__PURE__ */ jsx("summary", { class: "cursor-pointer text-sm font-medium text-gray-600 dark:text-gray-400", children: "Advanced Options" }),
3154
- /* @__PURE__ */ jsxs("div", { class: "mt-2 space-y-2", "x-data": "{ headers: [] }", children: [
3155
- /* @__PURE__ */ jsxs("div", { children: [
3156
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4063
+ /* @__PURE__ */ jsxs("details", { class: "bg-gray-50 dark:bg-gray-900 p-2 rounded-md", children: [
4064
+ /* @__PURE__ */ jsx("summary", { class: "cursor-pointer text-sm font-medium text-gray-600 dark:text-gray-400", children: "Advanced Options" }),
4065
+ /* @__PURE__ */ jsxs("div", { class: "mt-2 space-y-2", "x-data": "{ headers: [] }", children: [
4066
+ /* @__PURE__ */ jsxs("div", { children: [
4067
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4068
+ /* @__PURE__ */ jsx(
4069
+ "label",
4070
+ {
4071
+ for: "maxPages",
4072
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4073
+ children: "Max Pages"
4074
+ }
4075
+ ),
4076
+ /* @__PURE__ */ jsx(Tooltip, { text: "The maximum number of pages to scrape. Default is 1000. Setting this too high may result in longer processing times." })
4077
+ ] }),
3157
4078
  /* @__PURE__ */ jsx(
3158
- "label",
4079
+ "input",
3159
4080
  {
3160
- for: "maxPages",
3161
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3162
- children: "Max Pages"
4081
+ type: "number",
4082
+ name: "maxPages",
4083
+ id: "maxPages",
4084
+ min: "1",
4085
+ placeholder: "1000",
4086
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3163
4087
  }
3164
- ),
3165
- /* @__PURE__ */ jsx(Tooltip, { text: "The maximum number of pages to scrape. Default is 1000. Setting this too high may result in longer processing times." })
4088
+ )
3166
4089
  ] }),
3167
- /* @__PURE__ */ jsx(
3168
- "input",
3169
- {
3170
- type: "number",
3171
- name: "maxPages",
3172
- id: "maxPages",
3173
- min: "1",
3174
- placeholder: "1000",
3175
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3176
- }
3177
- )
3178
- ] }),
3179
- /* @__PURE__ */ jsxs("div", { children: [
3180
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4090
+ /* @__PURE__ */ jsxs("div", { children: [
4091
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4092
+ /* @__PURE__ */ jsx(
4093
+ "label",
4094
+ {
4095
+ for: "maxDepth",
4096
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4097
+ children: "Max Depth"
4098
+ }
4099
+ ),
4100
+ /* @__PURE__ */ jsx(Tooltip, { text: "How many links deep the scraper should follow. Default is 3. Higher values capture more content but increase processing time." })
4101
+ ] }),
3181
4102
  /* @__PURE__ */ jsx(
3182
- "label",
4103
+ "input",
3183
4104
  {
3184
- for: "maxDepth",
3185
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3186
- children: "Max Depth"
4105
+ type: "number",
4106
+ name: "maxDepth",
4107
+ id: "maxDepth",
4108
+ min: "0",
4109
+ placeholder: "3",
4110
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3187
4111
  }
3188
- ),
3189
- /* @__PURE__ */ jsx(Tooltip, { text: "How many links deep the scraper should follow. Default is 3. Higher values capture more content but increase processing time." })
4112
+ )
3190
4113
  ] }),
3191
- /* @__PURE__ */ jsx(
3192
- "input",
3193
- {
3194
- type: "number",
3195
- name: "maxDepth",
3196
- id: "maxDepth",
3197
- min: "0",
3198
- placeholder: "3",
3199
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3200
- }
3201
- )
3202
- ] }),
3203
- /* @__PURE__ */ jsxs("div", { children: [
3204
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3205
- /* @__PURE__ */ jsx(
3206
- "label",
4114
+ /* @__PURE__ */ jsxs("div", { children: [
4115
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4116
+ /* @__PURE__ */ jsx(
4117
+ "label",
4118
+ {
4119
+ for: "scope",
4120
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4121
+ children: "Scope"
4122
+ }
4123
+ ),
4124
+ /* @__PURE__ */ jsx(
4125
+ Tooltip,
4126
+ {
4127
+ text: /* @__PURE__ */ jsxs("div", { children: [
4128
+ "Controls which pages are scraped:",
4129
+ /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
4130
+ /* @__PURE__ */ jsx("li", { children: "'Subpages' only scrapes under the given URL path," }),
4131
+ /* @__PURE__ */ jsx("li", { children: "'Hostname' scrapes all content on the same host (e.g., all of docs.example.com)," }),
4132
+ /* @__PURE__ */ jsx("li", { children: "'Domain' scrapes all content on the domain and its subdomains (e.g., all of example.com)." })
4133
+ ] })
4134
+ ] })
4135
+ }
4136
+ )
4137
+ ] }),
4138
+ /* @__PURE__ */ jsxs(
4139
+ "select",
3207
4140
  {
3208
- for: "scope",
3209
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3210
- children: "Scope"
4141
+ name: "scope",
4142
+ id: "scope",
4143
+ class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
4144
+ children: [
4145
+ /* @__PURE__ */ jsx("option", { value: "subpages", selected: true, children: "Subpages (Default)" }),
4146
+ /* @__PURE__ */ jsx("option", { value: "hostname", children: "Hostname" }),
4147
+ /* @__PURE__ */ jsx("option", { value: "domain", children: "Domain" })
4148
+ ]
3211
4149
  }
3212
- ),
4150
+ )
4151
+ ] }),
4152
+ /* @__PURE__ */ jsxs("div", { children: [
4153
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4154
+ /* @__PURE__ */ jsx(
4155
+ "label",
4156
+ {
4157
+ for: "includePatterns",
4158
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4159
+ children: "Include Patterns"
4160
+ }
4161
+ ),
4162
+ /* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to include. One per line or comma-separated. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
4163
+ ] }),
3213
4164
  /* @__PURE__ */ jsx(
3214
- Tooltip,
4165
+ "textarea",
3215
4166
  {
3216
- text: /* @__PURE__ */ jsxs("div", { children: [
3217
- "Controls which pages are scraped:",
3218
- /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
3219
- /* @__PURE__ */ jsx("li", { children: "'Subpages' only scrapes under the given URL path," }),
3220
- /* @__PURE__ */ jsx("li", { children: "'Hostname' scrapes all content on the same host (e.g., all of docs.example.com)," }),
3221
- /* @__PURE__ */ jsx("li", { children: "'Domain' scrapes all content on the domain and its subdomains (e.g., all of example.com)." })
3222
- ] })
3223
- ] })
4167
+ name: "includePatterns",
4168
+ id: "includePatterns",
4169
+ rows: "2",
4170
+ placeholder: "e.g. docs/* or /api\\/v1.*/",
4171
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3224
4172
  }
3225
4173
  )
3226
4174
  ] }),
3227
- /* @__PURE__ */ jsxs(
3228
- "select",
3229
- {
3230
- name: "scope",
3231
- id: "scope",
3232
- class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
3233
- children: [
3234
- /* @__PURE__ */ jsx("option", { value: "subpages", selected: true, children: "Subpages (Default)" }),
3235
- /* @__PURE__ */ jsx("option", { value: "hostname", children: "Hostname" }),
3236
- /* @__PURE__ */ jsx("option", { value: "domain", children: "Domain" })
3237
- ]
3238
- }
3239
- )
3240
- ] }),
3241
- /* @__PURE__ */ jsxs("div", { children: [
3242
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4175
+ /* @__PURE__ */ jsxs("div", { children: [
4176
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4177
+ /* @__PURE__ */ jsx(
4178
+ "label",
4179
+ {
4180
+ for: "excludePatterns",
4181
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4182
+ children: "Exclude Patterns"
4183
+ }
4184
+ ),
4185
+ /* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to exclude. One per line or comma-separated. Exclude takes precedence over include. Regex patterns must be wrapped in slashes, e.g. /pattern/. Edit or clear this field to customize exclusions." })
4186
+ ] }),
3243
4187
  /* @__PURE__ */ jsx(
3244
- "label",
4188
+ "textarea",
3245
4189
  {
3246
- for: "includePatterns",
3247
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3248
- children: "Include Patterns"
4190
+ name: "excludePatterns",
4191
+ id: "excludePatterns",
4192
+ rows: "5",
4193
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white font-mono text-xs",
4194
+ children: defaultExcludePatternsText
3249
4195
  }
3250
4196
  ),
3251
- /* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to include. One per line or comma-separated. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
4197
+ /* @__PURE__ */ jsx("p", { class: "mt-1 text-xs text-gray-500 dark:text-gray-400", children: "Default patterns are pre-filled. Edit to customize or clear to exclude nothing." })
3252
4198
  ] }),
3253
- /* @__PURE__ */ jsx(
3254
- "textarea",
3255
- {
3256
- name: "includePatterns",
3257
- id: "includePatterns",
3258
- rows: "2",
3259
- placeholder: "e.g. docs/* or /api\\/v1.*/",
3260
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3261
- }
3262
- )
3263
- ] }),
3264
- /* @__PURE__ */ jsxs("div", { children: [
3265
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3266
- /* @__PURE__ */ jsx(
3267
- "label",
4199
+ /* @__PURE__ */ jsxs("div", { children: [
4200
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4201
+ /* @__PURE__ */ jsx(
4202
+ "label",
4203
+ {
4204
+ for: "scrapeMode",
4205
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4206
+ children: "Scrape Mode"
4207
+ }
4208
+ ),
4209
+ /* @__PURE__ */ jsx(
4210
+ Tooltip,
4211
+ {
4212
+ text: /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
4213
+ /* @__PURE__ */ jsx("li", { children: "'Auto' automatically selects the best method," }),
4214
+ /* @__PURE__ */ jsx("li", { children: "'Fetch' uses simple HTTP requests (faster but may miss dynamic content)," }),
4215
+ /* @__PURE__ */ jsx("li", { children: "'Playwright' uses a headless browser (slower but better for JS-heavy sites)." })
4216
+ ] }) })
4217
+ }
4218
+ )
4219
+ ] }),
4220
+ /* @__PURE__ */ jsxs(
4221
+ "select",
3268
4222
  {
3269
- for: "excludePatterns",
3270
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3271
- children: "Exclude Patterns"
4223
+ name: "scrapeMode",
4224
+ id: "scrapeMode",
4225
+ class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
4226
+ children: [
4227
+ /* @__PURE__ */ jsx("option", { value: ScrapeMode.Auto, selected: true, children: "Auto (Default)" }),
4228
+ /* @__PURE__ */ jsx("option", { value: ScrapeMode.Fetch, children: "Fetch" }),
4229
+ /* @__PURE__ */ jsx("option", { value: ScrapeMode.Playwright, children: "Playwright" })
4230
+ ]
3272
4231
  }
3273
- ),
3274
- /* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to exclude. One per line or comma-separated. Exclude takes precedence over include. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
4232
+ )
4233
+ ] }),
4234
+ /* @__PURE__ */ jsxs("div", { children: [
4235
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center mb-1", children: [
4236
+ /* @__PURE__ */ jsx("label", { class: "block text-sm font-medium text-gray-700 dark:text-gray-300", children: "Custom HTTP Headers" }),
4237
+ /* @__PURE__ */ jsx(Tooltip, { text: "Add custom HTTP headers (e.g., for authentication). These will be sent with every HTTP request." })
4238
+ ] }),
4239
+ /* @__PURE__ */ jsxs("div", { children: [
4240
+ /* @__PURE__ */ jsx("template", { "x-for": "(header, idx) in headers", children: /* @__PURE__ */ jsxs("div", { class: "flex space-x-2 mb-1", children: [
4241
+ /* @__PURE__ */ jsx(
4242
+ "input",
4243
+ {
4244
+ type: "text",
4245
+ class: "w-1/3 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
4246
+ placeholder: "Header Name",
4247
+ "x-model": "header.name",
4248
+ required: true
4249
+ }
4250
+ ),
4251
+ /* @__PURE__ */ jsx("span", { class: "text-gray-500", children: ":" }),
4252
+ /* @__PURE__ */ jsx(
4253
+ "input",
4254
+ {
4255
+ type: "text",
4256
+ class: "w-1/2 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
4257
+ placeholder: "Header Value",
4258
+ "x-model": "header.value",
4259
+ required: true
4260
+ }
4261
+ ),
4262
+ /* @__PURE__ */ jsx(
4263
+ "button",
4264
+ {
4265
+ type: "button",
4266
+ class: "text-red-500 hover:text-red-700 text-xs",
4267
+ "x-on:click": "headers.splice(idx, 1)",
4268
+ children: "Remove"
4269
+ }
4270
+ ),
4271
+ /* @__PURE__ */ jsx(
4272
+ "input",
4273
+ {
4274
+ type: "hidden",
4275
+ name: "header[]",
4276
+ "x-bind:value": "header.name && header.value ? header.name + ':' + header.value : ''"
4277
+ }
4278
+ )
4279
+ ] }) }),
4280
+ /* @__PURE__ */ jsx(
4281
+ "button",
4282
+ {
4283
+ type: "button",
4284
+ class: "mt-1 px-2 py-0.5 bg-indigo-100 dark:bg-indigo-900 text-indigo-700 dark:text-indigo-200 rounded text-xs",
4285
+ "x-on:click": "headers.push({ name: '', value: '' })",
4286
+ children: "+ Add Header"
4287
+ }
4288
+ )
4289
+ ] })
3275
4290
  ] }),
3276
- /* @__PURE__ */ jsx(
3277
- "textarea",
3278
- {
3279
- name: "excludePatterns",
3280
- id: "excludePatterns",
3281
- rows: "2",
3282
- placeholder: "e.g. private/* or /internal/",
3283
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3284
- }
3285
- )
3286
- ] }),
3287
- /* @__PURE__ */ jsxs("div", { children: [
3288
4291
  /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3289
4292
  /* @__PURE__ */ jsx(
3290
- "label",
4293
+ "input",
3291
4294
  {
3292
- for: "scrapeMode",
3293
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3294
- children: "Scrape Mode"
4295
+ id: "followRedirects",
4296
+ name: "followRedirects",
4297
+ type: "checkbox",
4298
+ checked: true,
4299
+ class: "h-4 w-4 text-indigo-600 focus:ring-indigo-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
3295
4300
  }
3296
4301
  ),
3297
4302
  /* @__PURE__ */ jsx(
3298
- Tooltip,
4303
+ "label",
3299
4304
  {
3300
- text: /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
3301
- /* @__PURE__ */ jsx("li", { children: "'Auto' automatically selects the best method," }),
3302
- /* @__PURE__ */ jsx("li", { children: "'Fetch' uses simple HTTP requests (faster but may miss dynamic content)," }),
3303
- /* @__PURE__ */ jsx("li", { children: "'Playwright' uses a headless browser (slower but better for JS-heavy sites)." })
3304
- ] }) })
4305
+ for: "followRedirects",
4306
+ class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
4307
+ children: "Follow Redirects"
3305
4308
  }
3306
4309
  )
3307
4310
  ] }),
3308
- /* @__PURE__ */ jsxs(
3309
- "select",
3310
- {
3311
- name: "scrapeMode",
3312
- id: "scrapeMode",
3313
- class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
3314
- children: [
3315
- /* @__PURE__ */ jsx("option", { value: ScrapeMode.Auto, selected: true, children: "Auto (Default)" }),
3316
- /* @__PURE__ */ jsx("option", { value: ScrapeMode.Fetch, children: "Fetch" }),
3317
- /* @__PURE__ */ jsx("option", { value: ScrapeMode.Playwright, children: "Playwright" })
3318
- ]
3319
- }
3320
- )
3321
- ] }),
3322
- /* @__PURE__ */ jsxs("div", { children: [
3323
- /* @__PURE__ */ jsxs("div", { class: "flex items-center mb-1", children: [
3324
- /* @__PURE__ */ jsx("label", { class: "block text-sm font-medium text-gray-700 dark:text-gray-300", children: "Custom HTTP Headers" }),
3325
- /* @__PURE__ */ jsx(Tooltip, { text: "Add custom HTTP headers (e.g., for authentication). These will be sent with every HTTP request." })
3326
- ] }),
3327
- /* @__PURE__ */ jsxs("div", { children: [
3328
- /* @__PURE__ */ jsx("template", { "x-for": "(header, idx) in headers", children: /* @__PURE__ */ jsxs("div", { class: "flex space-x-2 mb-1", children: [
3329
- /* @__PURE__ */ jsx(
3330
- "input",
3331
- {
3332
- type: "text",
3333
- class: "w-1/3 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
3334
- placeholder: "Header Name",
3335
- "x-model": "header.name",
3336
- required: true
3337
- }
3338
- ),
3339
- /* @__PURE__ */ jsx("span", { class: "text-gray-500", children: ":" }),
3340
- /* @__PURE__ */ jsx(
3341
- "input",
3342
- {
3343
- type: "text",
3344
- class: "w-1/2 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
3345
- placeholder: "Header Value",
3346
- "x-model": "header.value",
3347
- required: true
3348
- }
3349
- ),
3350
- /* @__PURE__ */ jsx(
3351
- "button",
3352
- {
3353
- type: "button",
3354
- class: "text-red-500 hover:text-red-700 text-xs",
3355
- "x-on:click": "headers.splice(idx, 1)",
3356
- children: "Remove"
3357
- }
3358
- ),
3359
- /* @__PURE__ */ jsx(
3360
- "input",
3361
- {
3362
- type: "hidden",
3363
- name: "header[]",
3364
- "x-bind:value": "header.name && header.value ? header.name + ':' + header.value : ''"
3365
- }
3366
- )
3367
- ] }) }),
4311
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4312
+ /* @__PURE__ */ jsx(
4313
+ "input",
4314
+ {
4315
+ id: "ignoreErrors",
4316
+ name: "ignoreErrors",
4317
+ type: "checkbox",
4318
+ checked: true,
4319
+ class: "h-4 w-4 text-indigo-600 focus:ring-indigo-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
4320
+ }
4321
+ ),
3368
4322
  /* @__PURE__ */ jsx(
3369
- "button",
4323
+ "label",
3370
4324
  {
3371
- type: "button",
3372
- class: "mt-1 px-2 py-0.5 bg-indigo-100 dark:bg-indigo-900 text-indigo-700 dark:text-indigo-200 rounded text-xs",
3373
- "x-on:click": "headers.push({ name: '', value: '' })",
3374
- children: "+ Add Header"
4325
+ for: "ignoreErrors",
4326
+ class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
4327
+ children: "Ignore Errors During Scraping"
3375
4328
  }
3376
4329
  )
3377
4330
  ] })
3378
- ] }),
3379
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3380
- /* @__PURE__ */ jsx(
3381
- "input",
3382
- {
3383
- id: "followRedirects",
3384
- name: "followRedirects",
3385
- type: "checkbox",
3386
- checked: true,
3387
- class: "h-4 w-4 text-indigo-600 focus:ring-indigo-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
3388
- }
3389
- ),
3390
- /* @__PURE__ */ jsx(
3391
- "label",
3392
- {
3393
- for: "followRedirects",
3394
- class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
3395
- children: "Follow Redirects"
3396
- }
3397
- )
3398
- ] }),
3399
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3400
- /* @__PURE__ */ jsx(
3401
- "input",
3402
- {
3403
- id: "ignoreErrors",
3404
- name: "ignoreErrors",
3405
- type: "checkbox",
3406
- checked: true,
3407
- class: "h-4 w-4 text-indigo-600 focus:ring-indigo-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
3408
- }
3409
- ),
3410
- /* @__PURE__ */ jsx(
3411
- "label",
3412
- {
3413
- for: "ignoreErrors",
3414
- class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
3415
- children: "Ignore Errors During Scraping"
3416
- }
3417
- )
3418
4331
  ] })
3419
- ] })
3420
- ] }),
3421
- /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsx(
3422
- "button",
3423
- {
3424
- type: "submit",
3425
- class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500",
3426
- children: "Queue Job"
3427
- }
3428
- ) })
3429
- ]
3430
- }
3431
- ),
3432
- /* @__PURE__ */ jsx("div", { id: "job-response", class: "mt-2 text-sm" })
3433
- ] });
3434
- const ScrapeForm = () => /* @__PURE__ */ jsx("div", { id: "scrape-form-container", children: /* @__PURE__ */ jsx(ScrapeFormContent, {}) });
4332
+ ] }),
4333
+ /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsx(
4334
+ "button",
4335
+ {
4336
+ type: "submit",
4337
+ class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500",
4338
+ children: "Queue Job"
4339
+ }
4340
+ ) })
4341
+ ]
4342
+ }
4343
+ ),
4344
+ /* @__PURE__ */ jsx("div", { id: "job-response", class: "mt-2 text-sm" })
4345
+ ] });
4346
+ };
4347
+ const ScrapeForm = ({ defaultExcludePatterns }) => /* @__PURE__ */ jsx("div", { id: "scrape-form-container", children: /* @__PURE__ */ jsx(ScrapeFormContent, { defaultExcludePatterns }) });
4348
+ const DEFAULT_FILE_EXCLUSIONS = [
4349
+ // CHANGELOG files (case variations)
4350
+ "**/CHANGELOG.md",
4351
+ "**/changelog.md",
4352
+ "**/CHANGELOG.mdx",
4353
+ "**/changelog.mdx",
4354
+ // LICENSE files (case variations)
4355
+ "**/LICENSE",
4356
+ "**/LICENSE.md",
4357
+ "**/license.md",
4358
+ // CODE_OF_CONDUCT files (case variations)
4359
+ "**/CODE_OF_CONDUCT.md",
4360
+ "**/code_of_conduct.md"
4361
+ ];
4362
+ const DEFAULT_FOLDER_EXCLUSIONS = [
4363
+ // Archive and deprecated content (matches anywhere in path)
4364
+ "**/archive/**",
4365
+ "**/archived/**",
4366
+ "**/deprecated/**",
4367
+ "**/legacy/**",
4368
+ "**/old/**",
4369
+ "**/outdated/**",
4370
+ "**/previous/**",
4371
+ "**/superseded/**",
4372
+ // Specific paths that don't follow the general pattern
4373
+ "docs/old/**",
4374
+ // Internationalization folders - non-English locales
4375
+ "**/i18n/ar*/**",
4376
+ "**/i18n/de*/**",
4377
+ "**/i18n/es*/**",
4378
+ "**/i18n/fr*/**",
4379
+ "**/i18n/hi*/**",
4380
+ "**/i18n/it*/**",
4381
+ "**/i18n/ja*/**",
4382
+ "**/i18n/ko*/**",
4383
+ "**/i18n/nl*/**",
4384
+ "**/i18n/pl*/**",
4385
+ "**/i18n/pt*/**",
4386
+ "**/i18n/ru*/**",
4387
+ "**/i18n/sv*/**",
4388
+ "**/i18n/th*/**",
4389
+ "**/i18n/tr*/**",
4390
+ "**/i18n/vi*/**",
4391
+ "**/i18n/zh*/**",
4392
+ // Common locale folder patterns
4393
+ "**/zh-cn/**",
4394
+ "**/zh-hk/**",
4395
+ "**/zh-mo/**",
4396
+ "**/zh-sg/**",
4397
+ "**/zh-tw/**"
4398
+ ];
4399
+ const DEFAULT_EXCLUSION_PATTERNS = [
4400
+ ...DEFAULT_FILE_EXCLUSIONS,
4401
+ ...DEFAULT_FOLDER_EXCLUSIONS
4402
+ ];
4403
+ function getEffectiveExclusionPatterns(userPatterns) {
4404
+ if (userPatterns !== void 0) {
4405
+ return userPatterns;
4406
+ }
4407
+ return DEFAULT_EXCLUSION_PATTERNS;
4408
+ }
3435
4409
  function registerNewJobRoutes(server, scrapeTool) {
3436
4410
  server.get("/web/jobs/new", async () => {
3437
- return /* @__PURE__ */ jsx(ScrapeForm, {});
4411
+ return /* @__PURE__ */ jsx(ScrapeForm, { defaultExcludePatterns: DEFAULT_EXCLUSION_PATTERNS });
3438
4412
  });
3439
4413
  server.post(
3440
4414
  "/web/jobs/scrape",
@@ -3505,7 +4479,7 @@ function registerNewJobRoutes(server, scrapeTool) {
3505
4479
  ] })
3506
4480
  }
3507
4481
  ),
3508
- /* @__PURE__ */ jsx("div", { id: "scrape-form-container", "hx-swap-oob": "innerHTML", children: /* @__PURE__ */ jsx(ScrapeFormContent, {}) })
4482
+ /* @__PURE__ */ jsx("div", { id: "scrape-form-container", "hx-swap-oob": "innerHTML", children: /* @__PURE__ */ jsx(ScrapeFormContent, { defaultExcludePatterns: DEFAULT_EXCLUSION_PATTERNS }) })
3509
4483
  ] });
3510
4484
  }
3511
4485
  return /* @__PURE__ */ jsx(Alert, { type: "warning", message: "Job finished unexpectedly quickly." });
@@ -3924,16 +4898,59 @@ async function registerWorkerService(pipeline) {
3924
4898
  pipeline.setCallbacks({
3925
4899
  onJobProgress: async (job, progress) => {
3926
4900
  logger.debug(
3927
- `📊 Job ${job.id} progress: ${progress.pagesScraped}/${progress.totalPages} pages`
4901
+ `Job ${job.id} progress: ${progress.pagesScraped}/${progress.totalPages} pages`
3928
4902
  );
4903
+ analytics.track(TelemetryEvent.PIPELINE_JOB_PROGRESS, {
4904
+ jobId: job.id,
4905
+ // Job IDs are already anonymous
4906
+ library: job.library,
4907
+ pagesScraped: progress.pagesScraped,
4908
+ totalPages: progress.totalPages,
4909
+ totalDiscovered: progress.totalDiscovered,
4910
+ progressPercent: Math.round(progress.pagesScraped / progress.totalPages * 100),
4911
+ currentDepth: progress.depth,
4912
+ maxDepth: progress.maxDepth,
4913
+ discoveryRatio: Math.round(
4914
+ progress.totalDiscovered / progress.totalPages * 100
4915
+ ),
4916
+ // How much we discovered vs limited total
4917
+ queue_efficiency: progress.totalPages > 0 ? Math.round(progress.pagesScraped / progress.totalPages * 100) : 0
4918
+ });
3929
4919
  },
3930
4920
  onJobStatusChange: async (job) => {
3931
- logger.debug(`🔄 Job ${job.id} status changed to: ${job.status}`);
4921
+ logger.debug(`Job ${job.id} status changed to: ${job.status}`);
4922
+ const duration = job.startedAt ? Date.now() - job.startedAt.getTime() : null;
4923
+ const queueWaitTime = job.startedAt && job.createdAt ? job.startedAt.getTime() - job.createdAt.getTime() : null;
4924
+ analytics.track(TelemetryEvent.PIPELINE_JOB_COMPLETED, {
4925
+ jobId: job.id,
4926
+ // Job IDs are already anonymous
4927
+ library: job.library,
4928
+ status: job.status,
4929
+ duration_ms: duration,
4930
+ queue_wait_time_ms: queueWaitTime,
4931
+ pages_processed: job.progressPages || 0,
4932
+ max_pages_configured: job.progressMaxPages || 0,
4933
+ has_version: !!job.version,
4934
+ has_error: !!job.error,
4935
+ throughput_pages_per_second: duration && job.progressPages ? Math.round(job.progressPages / duration * 1e3) : 0
4936
+ });
3932
4937
  },
3933
4938
  onJobError: async (job, error, document) => {
3934
4939
  logger.warn(
3935
4940
  `⚠️ Job ${job.id} error ${document ? `on document ${document.metadata.url}` : ""}: ${error.message}`
3936
4941
  );
4942
+ const errorInfo = sanitizeError(error);
4943
+ analytics.track(TelemetryEvent.ERROR_OCCURRED, {
4944
+ jobId: job.id,
4945
+ // Job IDs are already anonymous
4946
+ library: job.library,
4947
+ errorType: errorInfo.type,
4948
+ errorMessage: errorInfo.message,
4949
+ hasDocument: !!document,
4950
+ stage: document ? "document_processing" : "job_setup",
4951
+ hasStack: errorInfo.hasStack,
4952
+ pages_processed_before_error: job.progressPages || 0
4953
+ });
3937
4954
  }
3938
4955
  });
3939
4956
  await pipeline.start();
@@ -3952,7 +4969,7 @@ function getProjectRoot() {
3952
4969
  let currentDir = path.dirname(currentFilePath);
3953
4970
  while (true) {
3954
4971
  const packageJsonPath = path.join(currentDir, "package.json");
3955
- if (fs$1.existsSync(packageJsonPath)) {
4972
+ if (fs.existsSync(packageJsonPath)) {
3956
4973
  projectRoot = currentDir;
3957
4974
  return projectRoot;
3958
4975
  }
@@ -3975,6 +4992,7 @@ class AppServer {
3975
4992
  }
3976
4993
  server;
3977
4994
  mcpServer = null;
4995
+ authManager = null;
3978
4996
  config;
3979
4997
  /**
3980
4998
  * Validate the server configuration for invalid service combinations.
@@ -4005,15 +5023,48 @@ class AppServer {
4005
5023
  */
4006
5024
  async start() {
4007
5025
  this.validateConfig();
5026
+ if (this.config.telemetry !== false && shouldEnableTelemetry()) {
5027
+ try {
5028
+ telemetryService.startSession({
5029
+ sessionId: crypto.randomUUID(),
5030
+ interface: "web",
5031
+ startTime: /* @__PURE__ */ new Date(),
5032
+ version: process.env.npm_package_version || "unknown",
5033
+ platform: process.platform,
5034
+ servicesEnabled: this.getActiveServicesList(),
5035
+ authEnabled: Boolean(this.config.auth),
5036
+ readOnly: Boolean(this.config.readOnly)
5037
+ });
5038
+ } catch (error) {
5039
+ logger.debug(`Failed to initialize telemetry: ${error}`);
5040
+ }
5041
+ }
4008
5042
  await this.setupServer();
4009
5043
  try {
5044
+ const startupStartTime = performance.now();
4010
5045
  const address = await this.server.listen({
4011
5046
  port: this.config.port,
4012
5047
  host: "0.0.0.0"
4013
5048
  });
5049
+ const startupDuration = performance.now() - startupStartTime;
5050
+ if (analytics.isEnabled()) {
5051
+ analytics.track(TelemetryEvent.APP_STARTED, {
5052
+ startup_success: true,
5053
+ startup_duration_ms: Math.round(startupDuration),
5054
+ listen_address: address,
5055
+ active_services: this.getActiveServicesList()
5056
+ });
5057
+ }
4014
5058
  this.logStartupInfo(address);
4015
5059
  return this.server;
4016
5060
  } catch (error) {
5061
+ if (analytics.isEnabled()) {
5062
+ analytics.track(TelemetryEvent.APP_STARTED, {
5063
+ startup_success: false,
5064
+ error_type: error instanceof Error ? error.constructor.name : "UnknownError",
5065
+ error_message: error instanceof Error ? error.message : String(error)
5066
+ });
5067
+ }
4017
5068
  logger.error(`❌ Failed to start AppServer: ${error}`);
4018
5069
  await this.server.close();
4019
5070
  throw error;
@@ -4024,24 +5075,121 @@ class AppServer {
4024
5075
  */
4025
5076
  async stop() {
4026
5077
  try {
5078
+ if (analytics.isEnabled()) {
5079
+ analytics.track(TelemetryEvent.APP_SHUTDOWN, {
5080
+ graceful: true
5081
+ });
5082
+ }
4027
5083
  if (this.config.enableWorker) {
4028
5084
  await stopWorkerService(this.pipeline);
4029
5085
  }
4030
5086
  if (this.mcpServer) {
4031
5087
  await cleanupMcpService(this.mcpServer);
4032
5088
  }
5089
+ telemetryService.endSession();
5090
+ await telemetryService.shutdown();
4033
5091
  await this.server.close();
4034
5092
  logger.info("🛑 AppServer stopped");
4035
5093
  } catch (error) {
4036
5094
  logger.error(`❌ Failed to stop AppServer gracefully: ${error}`);
5095
+ if (analytics.isEnabled()) {
5096
+ analytics.track(TelemetryEvent.APP_SHUTDOWN, {
5097
+ graceful: false,
5098
+ error: error instanceof Error ? error.constructor.name : "UnknownError"
5099
+ });
5100
+ await telemetryService.shutdown();
5101
+ }
4037
5102
  throw error;
4038
5103
  }
4039
5104
  }
5105
+ /**
5106
+ * Setup global error handling for telemetry
5107
+ */
5108
+ setupErrorHandling() {
5109
+ if (!process.listenerCount("unhandledRejection")) {
5110
+ process.on("unhandledRejection", (reason) => {
5111
+ logger.error(`Unhandled Promise Rejection: ${reason}`);
5112
+ if (analytics.isEnabled()) {
5113
+ analytics.track(TelemetryEvent.ERROR_OCCURRED, {
5114
+ error_type: "UnhandledPromiseRejection",
5115
+ error_category: "system",
5116
+ component: "AppServer",
5117
+ severity: "critical",
5118
+ context: "process_unhandled_rejection"
5119
+ });
5120
+ }
5121
+ });
5122
+ }
5123
+ if (!process.listenerCount("uncaughtException")) {
5124
+ process.on("uncaughtException", (error) => {
5125
+ logger.error(`Uncaught Exception: ${error.message}`);
5126
+ if (analytics.isEnabled()) {
5127
+ analytics.track(TelemetryEvent.ERROR_OCCURRED, {
5128
+ error_type: error.constructor.name,
5129
+ error_category: "system",
5130
+ component: "AppServer",
5131
+ severity: "critical",
5132
+ context: "process_uncaught_exception"
5133
+ });
5134
+ }
5135
+ });
5136
+ }
5137
+ if (typeof this.server.setErrorHandler === "function") {
5138
+ this.server.setErrorHandler(async (error, request, reply) => {
5139
+ if (analytics.isEnabled()) {
5140
+ analytics.track(TelemetryEvent.ERROR_OCCURRED, {
5141
+ error_type: error.constructor.name,
5142
+ error_category: "http",
5143
+ component: "FastifyServer",
5144
+ severity: "high",
5145
+ status_code: error.statusCode || 500,
5146
+ method: request.method,
5147
+ route: request.routeOptions?.url || request.url,
5148
+ context: "http_request_error"
5149
+ });
5150
+ }
5151
+ logger.error(`HTTP Error on ${request.method} ${request.url}: ${error.message}`);
5152
+ const statusCode = error.statusCode || 500;
5153
+ reply.status(statusCode).send({
5154
+ error: "Internal Server Error",
5155
+ statusCode,
5156
+ message: statusCode < 500 ? error.message : "An unexpected error occurred"
5157
+ });
5158
+ });
5159
+ }
5160
+ }
5161
+ /**
5162
+ * Get list of currently active services for telemetry
5163
+ */
5164
+ getActiveServicesList() {
5165
+ const services = [];
5166
+ if (this.config.enableMcpServer) services.push("mcp");
5167
+ if (this.config.enableWebInterface) services.push("web");
5168
+ if (this.config.enableApiServer) services.push("api");
5169
+ if (this.config.enableWorker) services.push("worker");
5170
+ return services;
5171
+ }
4040
5172
  /**
4041
5173
  * Setup the server with plugins and conditionally enabled services.
4042
5174
  */
4043
5175
  async setupServer() {
5176
+ this.setupErrorHandling();
5177
+ if (this.config.auth?.enabled) {
5178
+ await this.initializeAuth();
5179
+ }
4044
5180
  await this.server.register(formBody);
5181
+ if (this.config.auth?.enabled) {
5182
+ this.server.addHook("onRequest", async (request) => {
5183
+ if (request.url.includes("/oauth") || request.url.includes("/auth") || request.url.includes("/register")) {
5184
+ logger.debug(
5185
+ `${request.method} ${request.url} - Headers: ${JSON.stringify(request.headers)}`
5186
+ );
5187
+ }
5188
+ });
5189
+ }
5190
+ if (this.config.auth?.enabled && this.authManager) {
5191
+ await this.setupAuthMetadataEndpoint();
5192
+ }
4045
5193
  if (this.config.enableWebInterface) {
4046
5194
  await this.enableWebInterface();
4047
5195
  }
@@ -4072,7 +5220,9 @@ class AppServer {
4072
5220
  this.mcpServer = await registerMcpService(
4073
5221
  this.server,
4074
5222
  this.docService,
4075
- this.pipeline
5223
+ this.pipeline,
5224
+ this.config.readOnly,
5225
+ this.authManager || void 0
4076
5226
  );
4077
5227
  logger.debug("MCP server service enabled");
4078
5228
  }
@@ -4100,6 +5250,28 @@ class AppServer {
4100
5250
  index: false
4101
5251
  });
4102
5252
  }
5253
+ /**
5254
+ * Initialize OAuth2/OIDC authentication manager.
5255
+ */
5256
+ async initializeAuth() {
5257
+ if (!this.config.auth) {
5258
+ return;
5259
+ }
5260
+ this.authManager = new ProxyAuthManager(this.config.auth);
5261
+ await this.authManager.initialize();
5262
+ logger.debug("Proxy auth manager initialized");
5263
+ }
5264
+ /**
5265
+ * Setup OAuth2 endpoints using ProxyAuthManager.
5266
+ */
5267
+ async setupAuthMetadataEndpoint() {
5268
+ if (!this.authManager) {
5269
+ return;
5270
+ }
5271
+ const baseUrl = new URL(`http://localhost:${this.config.port}`);
5272
+ this.authManager.registerRoutes(this.server, baseUrl);
5273
+ logger.debug("OAuth2 proxy endpoints registered");
5274
+ }
4103
5275
  /**
4104
5276
  * Log startup information showing which services are enabled.
4105
5277
  */
@@ -4130,9 +5302,9 @@ async function startAppServer(docService, pipeline, config) {
4130
5302
  await appServer.start();
4131
5303
  return appServer;
4132
5304
  }
4133
- async function startStdioServer(tools) {
5305
+ async function startStdioServer(tools, readOnly = false) {
4134
5306
  setLogLevel(LogLevel.ERROR);
4135
- const server = createMcpServerInstance(tools);
5307
+ const server = createMcpServerInstance(tools, readOnly);
4136
5308
  const transport = new StdioServerTransport();
4137
5309
  await server.connect(transport);
4138
5310
  logger.info("🤖 MCP server listening on stdio");
@@ -4191,10 +5363,10 @@ async function applyMigrations(db) {
4191
5363
  logger.debug("Checking database migrations...");
4192
5364
  ensureMigrationsTable(db);
4193
5365
  const appliedMigrations = getAppliedMigrations(db);
4194
- if (!fs$1.existsSync(MIGRATIONS_DIR)) {
5366
+ if (!fs.existsSync(MIGRATIONS_DIR)) {
4195
5367
  throw new StoreError("Migrations directory not found");
4196
5368
  }
4197
- const migrationFiles = fs$1.readdirSync(MIGRATIONS_DIR).filter((file) => file.endsWith(".sql")).sort();
5369
+ const migrationFiles = fs.readdirSync(MIGRATIONS_DIR).filter((file) => file.endsWith(".sql")).sort();
4198
5370
  const pendingMigrations = migrationFiles.filter(
4199
5371
  (filename) => !appliedMigrations.has(filename)
4200
5372
  );
@@ -4205,12 +5377,12 @@ async function applyMigrations(db) {
4205
5377
  for (const filename of pendingMigrations) {
4206
5378
  logger.debug(`Applying migration: ${filename}`);
4207
5379
  const filePath = path.join(MIGRATIONS_DIR, filename);
4208
- const sql = fs$1.readFileSync(filePath, "utf8");
5380
+ const sql = fs.readFileSync(filePath, "utf8");
4209
5381
  try {
4210
5382
  db.exec(sql);
4211
5383
  const insertStmt = db.prepare(`INSERT INTO ${MIGRATIONS_TABLE} (id) VALUES (?)`);
4212
5384
  insertStmt.run(filename);
4213
- logger.debug(`✅ Applied migration: ${filename}`);
5385
+ logger.debug(`Applied migration: ${filename}`);
4214
5386
  appliedCount++;
4215
5387
  } catch (error) {
4216
5388
  logger.error(`❌ Failed to apply migration: ${filename} - ${error}`);
@@ -4284,12 +5456,12 @@ async function createDocumentManagement(options = {}) {
4284
5456
  await client.initialize();
4285
5457
  return client;
4286
5458
  }
4287
- const service = new (await import("./DocumentManagementService-BH02TJEe.js")).DocumentManagementService();
5459
+ const service = new (await import("./DocumentManagementService-C1xAzouZ.js")).DocumentManagementService();
4288
5460
  await service.initialize();
4289
5461
  return service;
4290
5462
  }
4291
5463
  async function createLocalDocumentManagement() {
4292
- const service = new (await import("./DocumentManagementService-BH02TJEe.js")).DocumentManagementService();
5464
+ const service = new (await import("./DocumentManagementService-C1xAzouZ.js")).DocumentManagementService();
4293
5465
  await service.initialize();
4294
5466
  return service;
4295
5467
  }
@@ -4459,17 +5631,42 @@ function validateUrl(url) {
4459
5631
  throw new InvalidUrlError(url, error instanceof Error ? error : void 0);
4460
5632
  }
4461
5633
  }
4462
- function hasSameHostname(urlA, urlB) {
4463
- return urlA.hostname.toLowerCase() === urlB.hostname.toLowerCase();
5634
+ function extractPrimaryDomain(hostname) {
5635
+ if (/^\d+\.\d+\.\d+\.\d+$/.test(hostname) || /^[0-9a-fA-F:]+$/.test(hostname)) {
5636
+ return hostname;
5637
+ }
5638
+ if (!hostname.includes(".")) {
5639
+ return hostname;
5640
+ }
5641
+ const domain = psl.get(hostname.toLowerCase());
5642
+ return domain || hostname;
4464
5643
  }
4465
- function hasSameDomain(urlA, urlB) {
4466
- const domainA = psl.get(urlA.hostname.toLowerCase());
4467
- const domainB = psl.get(urlB.hostname.toLowerCase());
4468
- return domainA !== null && domainA === domainB;
5644
+ function computeBaseDirectory(pathname) {
5645
+ if (pathname === "") return "/";
5646
+ if (pathname.endsWith("/")) return pathname;
5647
+ const lastSegment = pathname.split("/").at(-1) || "";
5648
+ const looksLikeFile = lastSegment.includes(".");
5649
+ if (looksLikeFile) {
5650
+ return pathname.replace(/\/[^/]*$/, "/");
5651
+ }
5652
+ return `${pathname}/`;
4469
5653
  }
4470
- function isSubpath(baseUrl, targetUrl) {
4471
- const basePath = baseUrl.pathname.endsWith("/") ? baseUrl.pathname : `${baseUrl.pathname}/`;
4472
- return targetUrl.pathname.startsWith(basePath);
5654
+ function isInScope(baseUrl, targetUrl, scope) {
5655
+ if (baseUrl.protocol !== targetUrl.protocol) return false;
5656
+ switch (scope) {
5657
+ case "subpages": {
5658
+ if (baseUrl.hostname !== targetUrl.hostname) return false;
5659
+ const baseDir = computeBaseDirectory(baseUrl.pathname);
5660
+ return targetUrl.pathname.startsWith(baseDir);
5661
+ }
5662
+ case "hostname":
5663
+ return baseUrl.hostname === targetUrl.hostname;
5664
+ case "domain": {
5665
+ return extractPrimaryDomain(baseUrl.hostname) === extractPrimaryDomain(targetUrl.hostname);
5666
+ }
5667
+ default:
5668
+ return false;
5669
+ }
4473
5670
  }
4474
5671
  function isRegexPattern(pattern) {
4475
5672
  return pattern.length > 2 && pattern.startsWith("/") && pattern.endsWith("/");
@@ -4512,29 +5709,12 @@ function shouldIncludeUrl(url, includePatterns, excludePatterns) {
4512
5709
  }
4513
5710
  }
4514
5711
  const stripSlash = (patterns) => patterns?.map((p) => p.startsWith("/") ? p.slice(1) : p);
4515
- if (matchesAnyPattern(normalizedPath, excludePatterns) || basename && matchesAnyPattern(basename, stripSlash(excludePatterns)))
5712
+ const effectiveExcludePatterns = getEffectiveExclusionPatterns(excludePatterns);
5713
+ if (matchesAnyPattern(normalizedPath, effectiveExcludePatterns) || basename && matchesAnyPattern(basename, stripSlash(effectiveExcludePatterns)))
4516
5714
  return false;
4517
5715
  if (!includePatterns || includePatterns.length === 0) return true;
4518
5716
  return matchesAnyPattern(normalizedPath, includePatterns) || (basename ? matchesAnyPattern(basename, stripSlash(includePatterns)) : false);
4519
5717
  }
4520
- function isInScope(baseUrl, targetUrl, scope) {
4521
- if (baseUrl.protocol !== targetUrl.protocol) return false;
4522
- switch (scope) {
4523
- case "subpages": {
4524
- if (baseUrl.hostname !== targetUrl.hostname) return false;
4525
- const baseDir = baseUrl.pathname.endsWith("/") ? baseUrl.pathname : baseUrl.pathname.replace(/\/[^/]*$/, "/");
4526
- return targetUrl.pathname.startsWith(baseDir);
4527
- }
4528
- case "hostname":
4529
- return baseUrl.hostname === targetUrl.hostname;
4530
- case "domain": {
4531
- const getDomain = (host) => host.split(".").slice(-2).join(".");
4532
- return getDomain(baseUrl.hostname) === getDomain(targetUrl.hostname);
4533
- }
4534
- default:
4535
- return false;
4536
- }
4537
- }
4538
5718
  const DEFAULT_MAX_DEPTH = 3;
4539
5719
  const DEFAULT_CONCURRENCY = 3;
4540
5720
  class BaseScraperStrategy {
@@ -4543,6 +5723,8 @@ class BaseScraperStrategy {
4543
5723
  totalDiscovered = 0;
4544
5724
  // Track total URLs discovered (unlimited)
4545
5725
  effectiveTotal = 0;
5726
+ // Track effective total (limited by maxPages)
5727
+ canonicalBaseUrl;
4546
5728
  options;
4547
5729
  constructor(options = {}) {
4548
5730
  this.options = options;
@@ -4554,7 +5736,7 @@ class BaseScraperStrategy {
4554
5736
  shouldProcessUrl(url, options) {
4555
5737
  if (options.scope) {
4556
5738
  try {
4557
- const base = new URL$1(options.url);
5739
+ const base = this.canonicalBaseUrl ?? new URL$1(options.url);
4558
5740
  const target = new URL$1(url);
4559
5741
  if (!isInScope(base, target, options.scope)) return false;
4560
5742
  } catch {
@@ -4577,6 +5759,23 @@ class BaseScraperStrategy {
4577
5759
  }
4578
5760
  try {
4579
5761
  const result = await this.processItem(item, options, void 0, signal);
5762
+ if (item.depth === 0 && !this.canonicalBaseUrl && result?.finalUrl) {
5763
+ try {
5764
+ const finalUrlStr = result.finalUrl;
5765
+ const original = new URL$1(options.url);
5766
+ const finalUrlObj = new URL$1(finalUrlStr);
5767
+ if (finalUrlObj.href !== original.href && (finalUrlObj.protocol === "http:" || finalUrlObj.protocol === "https:")) {
5768
+ this.canonicalBaseUrl = finalUrlObj;
5769
+ logger.debug(
5770
+ `Updated scope base after redirect: ${original.href} -> ${finalUrlObj.href}`
5771
+ );
5772
+ } else {
5773
+ this.canonicalBaseUrl = original;
5774
+ }
5775
+ } catch {
5776
+ this.canonicalBaseUrl = new URL$1(options.url);
5777
+ }
5778
+ }
4580
5779
  if (result.document) {
4581
5780
  this.pageCount++;
4582
5781
  logger.info(
@@ -4637,7 +5836,8 @@ class BaseScraperStrategy {
4637
5836
  this.pageCount = 0;
4638
5837
  this.totalDiscovered = 1;
4639
5838
  this.effectiveTotal = 1;
4640
- const baseUrl = new URL$1(options.url);
5839
+ this.canonicalBaseUrl = new URL$1(options.url);
5840
+ let baseUrl = this.canonicalBaseUrl;
4641
5841
  const queue = [{ url: options.url, depth: 0 }];
4642
5842
  this.visited.add(normalizeUrl(options.url, this.options.urlNormalizerOptions));
4643
5843
  const maxPages = options.maxPages ?? DEFAULT_MAX_PAGES;
@@ -4658,6 +5858,7 @@ class BaseScraperStrategy {
4658
5858
  queue.length
4659
5859
  );
4660
5860
  const batch = queue.splice(0, batchSize);
5861
+ baseUrl = this.canonicalBaseUrl ?? baseUrl;
4661
5862
  const newUrls = await this.processBatch(
4662
5863
  batch,
4663
5864
  baseUrl,
@@ -4690,22 +5891,7 @@ class WebScraperStrategy extends BaseScraperStrategy {
4690
5891
  return false;
4691
5892
  }
4692
5893
  }
4693
- /**
4694
- * Determines if a target URL should be followed based on the scope setting.
4695
- */
4696
- isInScope(baseUrl, targetUrl, scope) {
4697
- try {
4698
- if (scope === "domain") {
4699
- return hasSameDomain(baseUrl, targetUrl);
4700
- }
4701
- if (scope === "hostname") {
4702
- return hasSameHostname(baseUrl, targetUrl);
4703
- }
4704
- return hasSameHostname(baseUrl, targetUrl) && isSubpath(baseUrl, targetUrl);
4705
- } catch {
4706
- return false;
4707
- }
4708
- }
5894
+ // Removed custom isInScope logic; using shared scope utility for consistent behavior
4709
5895
  /**
4710
5896
  * Processes a single queue item by fetching its content and processing it through pipelines.
4711
5897
  * @param item - The queue item to process.
@@ -4746,12 +5932,12 @@ class WebScraperStrategy extends BaseScraperStrategy {
4746
5932
  );
4747
5933
  return { document: void 0, links: processed.links };
4748
5934
  }
4749
- const baseUrl = new URL(options.url);
5935
+ const baseUrl = item.depth === 0 ? new URL(rawContent.source) : this.canonicalBaseUrl ?? new URL(options.url);
4750
5936
  const filteredLinks = processed.links.filter((link) => {
4751
5937
  try {
4752
5938
  const targetUrl = new URL(link);
4753
5939
  const scope = options.scope || "subpages";
4754
- return this.isInScope(baseUrl, targetUrl, scope) && (!this.shouldFollowLinkFn || this.shouldFollowLinkFn(baseUrl, targetUrl));
5940
+ return isInScope(baseUrl, targetUrl, scope) && (!this.shouldFollowLinkFn || this.shouldFollowLinkFn(baseUrl, targetUrl));
4755
5941
  } catch {
4756
5942
  return false;
4757
5943
  }
@@ -4767,7 +5953,8 @@ class WebScraperStrategy extends BaseScraperStrategy {
4767
5953
  ...processed.metadata
4768
5954
  }
4769
5955
  },
4770
- links: filteredLinks
5956
+ links: filteredLinks,
5957
+ finalUrl: rawContent.source
4771
5958
  };
4772
5959
  } catch (error) {
4773
5960
  logger.error(`❌ Failed processing page ${url}: ${error}`);
@@ -4849,9 +6036,9 @@ class LocalFileStrategy extends BaseScraperStrategy {
4849
6036
  }
4850
6037
  async processItem(item, options, _progressCallback, _signal) {
4851
6038
  const filePath = decodeURIComponent(item.url.replace(/^file:\/\//, ""));
4852
- const stats = await fs.stat(filePath);
6039
+ const stats = await fs$1.stat(filePath);
4853
6040
  if (stats.isDirectory()) {
4854
- const contents = await fs.readdir(filePath);
6041
+ const contents = await fs$1.readdir(filePath);
4855
6042
  const links = contents.map((name) => `file://${path.join(filePath, name)}`).filter((url) => this.shouldProcessUrl(url, options));
4856
6043
  return { links };
4857
6044
  }
@@ -5682,11 +6869,11 @@ async function createPipelineWithCallbacks(docService, options = {}) {
5682
6869
  pipeline.setCallbacks({
5683
6870
  onJobProgress: async (job, progress) => {
5684
6871
  logger.debug(
5685
- `📊 Job ${job.id} progress: ${progress.pagesScraped}/${progress.totalPages} pages`
6872
+ `Job ${job.id} progress: ${progress.pagesScraped}/${progress.totalPages} pages`
5686
6873
  );
5687
6874
  },
5688
6875
  onJobStatusChange: async (job) => {
5689
- logger.debug(`🔄 Job ${job.id} status changed to: ${job.status}`);
6876
+ logger.debug(`Job ${job.id} status changed to: ${job.status}`);
5690
6877
  },
5691
6878
  onJobError: async (job, error, document) => {
5692
6879
  logger.warn(
@@ -5703,7 +6890,9 @@ function createAppServerConfig(options) {
5703
6890
  enableApiServer: options.enableApiServer ?? false,
5704
6891
  enableWorker: options.enableWorker ?? true,
5705
6892
  port: options.port,
5706
- externalWorkerUrl: options.externalWorkerUrl
6893
+ externalWorkerUrl: options.externalWorkerUrl,
6894
+ readOnly: options.readOnly ?? false,
6895
+ auth: options.auth
5707
6896
  };
5708
6897
  }
5709
6898
  function parseHeaders(headerOptions) {
@@ -5724,8 +6913,84 @@ const CLI_DEFAULTS = {
5724
6913
  PROTOCOL: DEFAULT_PROTOCOL,
5725
6914
  HTTP_PORT: DEFAULT_HTTP_PORT,
5726
6915
  WEB_PORT: DEFAULT_WEB_PORT,
5727
- MAX_CONCURRENCY: DEFAULT_MAX_CONCURRENCY
6916
+ MAX_CONCURRENCY: DEFAULT_MAX_CONCURRENCY,
6917
+ TELEMETRY: true
5728
6918
  };
6919
+ function parseAuthConfig(options) {
6920
+ const enabled = options.authEnabled ?? (process.env.DOCS_MCP_AUTH_ENABLED?.toLowerCase() === "true" || false);
6921
+ if (!enabled) {
6922
+ return void 0;
6923
+ }
6924
+ const issuerUrl = options.authIssuerUrl ?? process.env.DOCS_MCP_AUTH_ISSUER_URL;
6925
+ const audience = options.authAudience ?? process.env.DOCS_MCP_AUTH_AUDIENCE;
6926
+ return {
6927
+ enabled,
6928
+ issuerUrl,
6929
+ audience,
6930
+ scopes: ["openid", "profile"]
6931
+ // Default scopes for OAuth2/OIDC
6932
+ };
6933
+ }
6934
+ function validateAuthConfig(authConfig) {
6935
+ if (!authConfig.enabled) {
6936
+ return;
6937
+ }
6938
+ const errors = [];
6939
+ if (!authConfig.issuerUrl) {
6940
+ errors.push("--auth-issuer-url is required when auth is enabled");
6941
+ } else {
6942
+ try {
6943
+ const url = new URL(authConfig.issuerUrl);
6944
+ if (url.protocol !== "https:") {
6945
+ errors.push("Issuer URL must use HTTPS protocol");
6946
+ }
6947
+ } catch {
6948
+ errors.push("Issuer URL must be a valid URL");
6949
+ }
6950
+ }
6951
+ if (!authConfig.audience) {
6952
+ errors.push("--auth-audience is required when auth is enabled");
6953
+ } else {
6954
+ try {
6955
+ const url = new URL(authConfig.audience);
6956
+ if (url.protocol === "http:" && url.hostname !== "localhost") {
6957
+ logger.warn(
6958
+ "⚠️ Audience uses HTTP protocol - consider using HTTPS for production"
6959
+ );
6960
+ }
6961
+ if (url.hash) {
6962
+ errors.push("Audience must not contain URL fragments");
6963
+ }
6964
+ } catch {
6965
+ if (authConfig.audience.startsWith("urn:")) {
6966
+ const urnParts = authConfig.audience.split(":");
6967
+ if (urnParts.length < 3 || !urnParts[1] || !urnParts[2]) {
6968
+ errors.push("URN audience must follow format: urn:namespace:specific-string");
6969
+ }
6970
+ } else {
6971
+ errors.push(
6972
+ "Audience must be a valid absolute URL or URN (e.g., https://api.example.com or urn:company:service)"
6973
+ );
6974
+ }
6975
+ }
6976
+ }
6977
+ if (errors.length > 0) {
6978
+ throw new Error(`Auth configuration validation failed:
6979
+ ${errors.join("\n")}`);
6980
+ }
6981
+ }
6982
+ function warnHttpUsage(authConfig, port) {
6983
+ if (!authConfig?.enabled) {
6984
+ return;
6985
+ }
6986
+ const isLocalhost = process.env.NODE_ENV !== "production" || port === 6280 || // default dev port
6987
+ process.env.HOSTNAME?.includes("localhost");
6988
+ if (!isLocalhost) {
6989
+ logger.warn(
6990
+ "⚠️ Authentication is enabled but running over HTTP in production. Consider using HTTPS for security."
6991
+ );
6992
+ }
6993
+ }
5729
6994
  function createDefaultAction(program) {
5730
6995
  return program.addOption(
5731
6996
  new Option("--protocol <protocol>", "Protocol for MCP server").choices(["auto", "stdio", "http"]).default("auto")
@@ -5737,13 +7002,33 @@ function createDefaultAction(program) {
5737
7002
  }
5738
7003
  return String(n);
5739
7004
  }).default(CLI_DEFAULTS.HTTP_PORT.toString())
5740
- ).option("--resume", "Resume interrupted jobs on startup", false).option("--no-resume", "Do not resume jobs on startup").action(
7005
+ ).option("--resume", "Resume interrupted jobs on startup", false).option("--no-resume", "Do not resume jobs on startup").option(
7006
+ "--read-only",
7007
+ "Run in read-only mode (only expose read tools, disable write/job tools)",
7008
+ false
7009
+ ).option(
7010
+ "--auth-enabled",
7011
+ "Enable OAuth2/OIDC authentication for MCP endpoints",
7012
+ false
7013
+ ).option("--auth-issuer-url <url>", "Issuer/discovery URL for OAuth2/OIDC provider").option(
7014
+ "--auth-audience <id>",
7015
+ "JWT audience claim (identifies this protected resource)"
7016
+ ).action(
5741
7017
  async (options, command) => {
5742
7018
  const globalOptions = command.opts();
5743
7019
  const resolvedProtocol = resolveProtocol(options.protocol);
5744
7020
  setupLogging(globalOptions, resolvedProtocol);
5745
7021
  logger.debug("No subcommand specified, starting unified server by default...");
5746
7022
  const port = validatePort(options.port);
7023
+ const authConfig = parseAuthConfig({
7024
+ authEnabled: options.authEnabled,
7025
+ authIssuerUrl: options.authIssuerUrl,
7026
+ authAudience: options.authAudience
7027
+ });
7028
+ if (authConfig) {
7029
+ validateAuthConfig(authConfig);
7030
+ warnHttpUsage(authConfig, port);
7031
+ }
5747
7032
  ensurePlaywrightBrowsersInstalled();
5748
7033
  const docService = await createLocalDocumentManagement();
5749
7034
  const pipelineOptions = {
@@ -5753,14 +7038,14 @@ function createDefaultAction(program) {
5753
7038
  };
5754
7039
  const pipeline = await createPipelineWithCallbacks(docService, pipelineOptions);
5755
7040
  if (resolvedProtocol === "stdio") {
5756
- logger.debug(`🔍 Auto-detected stdio protocol (no TTY)`);
7041
+ logger.debug(`Auto-detected stdio protocol (no TTY)`);
5757
7042
  await pipeline.start();
5758
7043
  const mcpTools = await initializeTools(docService, pipeline);
5759
- await startStdioServer(mcpTools);
7044
+ await startStdioServer(mcpTools, options.readOnly);
5760
7045
  await new Promise(() => {
5761
7046
  });
5762
7047
  } else {
5763
- logger.debug(`🔍 Auto-detected http protocol (TTY available)`);
7048
+ logger.debug(`Auto-detected http protocol (TTY available)`);
5764
7049
  const config = createAppServerConfig({
5765
7050
  enableWebInterface: true,
5766
7051
  // Enable web interface in http mode
@@ -5770,7 +7055,9 @@ function createDefaultAction(program) {
5770
7055
  // Enable API (tRPC) in http mode
5771
7056
  enableWorker: true,
5772
7057
  // Always enable in-process worker for unified server
5773
- port
7058
+ port,
7059
+ readOnly: options.readOnly,
7060
+ auth: authConfig
5774
7061
  });
5775
7062
  await startAppServer(docService, pipeline, config);
5776
7063
  await new Promise(() => {
@@ -5784,12 +7071,24 @@ async function fetchUrlAction(url, options, command) {
5784
7071
  setupLogging(globalOptions);
5785
7072
  const headers = parseHeaders(options.header);
5786
7073
  const fetchUrlTool = new FetchUrlTool(new HttpFetcher(), new FileFetcher());
5787
- const content = await fetchUrlTool.execute({
5788
- url,
5789
- followRedirects: options.followRedirects,
5790
- scrapeMode: options.scrapeMode,
5791
- headers: Object.keys(headers).length > 0 ? headers : void 0
5792
- });
7074
+ const content = await trackTool(
7075
+ "fetch_url",
7076
+ () => fetchUrlTool.execute({
7077
+ url,
7078
+ followRedirects: options.followRedirects,
7079
+ scrapeMode: options.scrapeMode,
7080
+ headers: Object.keys(headers).length > 0 ? headers : void 0
7081
+ }),
7082
+ (content2) => ({
7083
+ url_protocol: extractProtocol(url),
7084
+ // Safe: only protocol, not full URL
7085
+ follow_redirects: options.followRedirects,
7086
+ scrape_mode: options.scrapeMode,
7087
+ has_custom_headers: Object.keys(headers).length > 0,
7088
+ content_length: content2.length,
7089
+ cli_flags: extractCliFlags(process.argv)
7090
+ })
7091
+ );
5793
7092
  console.log(content);
5794
7093
  }
5795
7094
  function createFetchUrlCommand(program) {
@@ -5824,10 +7123,22 @@ async function findVersionAction(library, options, command) {
5824
7123
  const docService = await createDocumentManagement({ serverUrl });
5825
7124
  try {
5826
7125
  const findVersionTool = new FindVersionTool(docService);
5827
- const versionInfo = await findVersionTool.execute({
5828
- library,
5829
- targetVersion: options.version
5830
- });
7126
+ const versionInfo = await trackTool(
7127
+ "find_version",
7128
+ () => findVersionTool.execute({
7129
+ library,
7130
+ targetVersion: options.version
7131
+ }),
7132
+ (versionInfo2) => ({
7133
+ library,
7134
+ // Safe: library names are public
7135
+ has_target_version: !!options.version,
7136
+ result_type: typeof versionInfo2,
7137
+ // 'string'
7138
+ using_remote_server: !!serverUrl,
7139
+ cli_flags: extractCliFlags(process.argv)
7140
+ })
7141
+ );
5831
7142
  if (!versionInfo) throw new Error("Failed to get version information");
5832
7143
  console.log(versionInfo);
5833
7144
  } finally {
@@ -5847,7 +7158,15 @@ async function listAction(options, command) {
5847
7158
  const docService = await createDocumentManagement({ serverUrl });
5848
7159
  try {
5849
7160
  const listLibrariesTool = new ListLibrariesTool(docService);
5850
- const result = await listLibrariesTool.execute();
7161
+ const result = await trackTool(
7162
+ "list_libraries",
7163
+ () => listLibrariesTool.execute(),
7164
+ (result2) => ({
7165
+ library_count: result2.libraries.length,
7166
+ using_remote_server: !!serverUrl,
7167
+ cli_flags: extractCliFlags(process.argv)
7168
+ })
7169
+ );
5851
7170
  console.log(formatOutput(result.libraries));
5852
7171
  } finally {
5853
7172
  await docService.shutdown();
@@ -5873,6 +7192,17 @@ function createMcpCommand(program) {
5873
7192
  ).option(
5874
7193
  "--server-url <url>",
5875
7194
  "URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
7195
+ ).option(
7196
+ "--read-only",
7197
+ "Run in read-only mode (only expose read tools, disable write/job tools)",
7198
+ false
7199
+ ).option(
7200
+ "--auth-enabled",
7201
+ "Enable OAuth2/OIDC authentication for MCP endpoints",
7202
+ false
7203
+ ).option("--auth-issuer-url <url>", "Issuer/discovery URL for OAuth2/OIDC provider").option(
7204
+ "--auth-audience <id>",
7205
+ "JWT audience claim (identifies this protected resource)"
5876
7206
  ).action(
5877
7207
  async (cmdOptions, command) => {
5878
7208
  const globalOptions = command.parent?.opts() || {};
@@ -5880,6 +7210,14 @@ function createMcpCommand(program) {
5880
7210
  const serverUrl = cmdOptions.serverUrl;
5881
7211
  const resolvedProtocol = resolveProtocol(cmdOptions.protocol);
5882
7212
  setupLogging(globalOptions, resolvedProtocol);
7213
+ const authConfig = parseAuthConfig({
7214
+ authEnabled: cmdOptions.authEnabled,
7215
+ authIssuerUrl: cmdOptions.authIssuerUrl,
7216
+ authAudience: cmdOptions.authAudience
7217
+ });
7218
+ if (authConfig) {
7219
+ validateAuthConfig(authConfig);
7220
+ }
5883
7221
  try {
5884
7222
  const docService = await createDocumentManagement({
5885
7223
  serverUrl
@@ -5895,15 +7233,15 @@ function createMcpCommand(program) {
5895
7233
  pipelineOptions
5896
7234
  );
5897
7235
  if (resolvedProtocol === "stdio") {
5898
- logger.debug(`🔍 Auto-detected stdio protocol (no TTY)`);
7236
+ logger.debug(`Auto-detected stdio protocol (no TTY)`);
5899
7237
  logger.info("🚀 Starting MCP server (stdio mode)");
5900
7238
  await pipeline.start();
5901
7239
  const mcpTools = await initializeTools(docService, pipeline);
5902
- await startStdioServer(mcpTools);
7240
+ await startStdioServer(mcpTools, cmdOptions.readOnly);
5903
7241
  await new Promise(() => {
5904
7242
  });
5905
7243
  } else {
5906
- logger.debug(`🔍 Auto-detected http protocol (TTY available)`);
7244
+ logger.debug(`Auto-detected http protocol (TTY available)`);
5907
7245
  logger.info("🚀 Starting MCP server (http mode)");
5908
7246
  const config = createAppServerConfig({
5909
7247
  enableWebInterface: false,
@@ -5913,7 +7251,9 @@ function createMcpCommand(program) {
5913
7251
  // Never enable API in mcp command
5914
7252
  enableWorker: !serverUrl,
5915
7253
  port,
5916
- externalWorkerUrl: serverUrl
7254
+ externalWorkerUrl: serverUrl,
7255
+ readOnly: cmdOptions.readOnly,
7256
+ auth: authConfig
5917
7257
  });
5918
7258
  await startAppServer(docService, pipeline, config);
5919
7259
  await new Promise(() => {
@@ -5933,13 +7273,21 @@ async function removeAction(library, options, command) {
5933
7273
  const docService = await createDocumentManagement({ serverUrl });
5934
7274
  const { version: version2 } = options;
5935
7275
  try {
5936
- await docService.removeAllDocuments(library, version2);
5937
- console.log(
5938
- `✅ Successfully removed documents for ${library}${version2 ? `@${version2}` : " (unversioned)"}.`
7276
+ await trackTool(
7277
+ "remove_documents",
7278
+ () => docService.removeAllDocuments(library, version2),
7279
+ () => ({
7280
+ library,
7281
+ // Safe: library names are public
7282
+ has_version: !!version2,
7283
+ using_remote_server: !!serverUrl,
7284
+ cli_flags: extractCliFlags(process.argv)
7285
+ })
5939
7286
  );
7287
+ console.log(`✅ Successfully removed ${library}${version2 ? `@${version2}` : ""}.`);
5940
7288
  } catch (error) {
5941
7289
  console.error(
5942
- `❌ Failed to remove documents for ${library}${version2 ? `@${version2}` : " (unversioned)"}:`,
7290
+ `❌ Failed to remove ${library}${version2 ? `@${version2}` : ""}:`,
5943
7291
  error instanceof Error ? error.message : String(error)
5944
7292
  );
5945
7293
  throw error;
@@ -5975,23 +7323,48 @@ async function scrapeAction(library, url, options, command) {
5975
7323
  await pipeline.start();
5976
7324
  const scrapeTool = new ScrapeTool(pipeline);
5977
7325
  const headers = parseHeaders(options.header);
5978
- const result = await scrapeTool.execute({
5979
- url,
5980
- library,
5981
- version: options.version,
5982
- options: {
5983
- maxPages: Number.parseInt(options.maxPages),
5984
- maxDepth: Number.parseInt(options.maxDepth),
5985
- maxConcurrency: Number.parseInt(options.maxConcurrency),
5986
- ignoreErrors: options.ignoreErrors,
7326
+ const result = await trackTool(
7327
+ "scrape_docs",
7328
+ () => scrapeTool.execute({
7329
+ url,
7330
+ library,
7331
+ version: options.version,
7332
+ options: {
7333
+ maxPages: Number.parseInt(options.maxPages, 10),
7334
+ maxDepth: Number.parseInt(options.maxDepth, 10),
7335
+ maxConcurrency: Number.parseInt(options.maxConcurrency, 10),
7336
+ ignoreErrors: options.ignoreErrors,
7337
+ scope: options.scope,
7338
+ followRedirects: options.followRedirects,
7339
+ scrapeMode: options.scrapeMode,
7340
+ includePatterns: Array.isArray(options.includePattern) && options.includePattern.length > 0 ? options.includePattern : void 0,
7341
+ excludePatterns: Array.isArray(options.excludePattern) && options.excludePattern.length > 0 ? options.excludePattern : void 0,
7342
+ headers: Object.keys(headers).length > 0 ? headers : void 0
7343
+ }
7344
+ }),
7345
+ (result2) => ({
7346
+ library,
7347
+ // Safe: library names are public
7348
+ url_protocol: extractProtocol(url),
7349
+ // Safe: only protocol, not full URL
7350
+ max_pages: Number.parseInt(options.maxPages, 10),
7351
+ max_depth: Number.parseInt(options.maxDepth, 10),
7352
+ max_concurrency: Number.parseInt(options.maxConcurrency, 10),
7353
+ has_version: !!options.version,
5987
7354
  scope: options.scope,
5988
- followRedirects: options.followRedirects,
5989
- scrapeMode: options.scrapeMode,
5990
- includePatterns: Array.isArray(options.includePattern) && options.includePattern.length > 0 ? options.includePattern : void 0,
5991
- excludePatterns: Array.isArray(options.excludePattern) && options.excludePattern.length > 0 ? options.excludePattern : void 0,
5992
- headers: Object.keys(headers).length > 0 ? headers : void 0
5993
- }
5994
- });
7355
+ scrape_mode: options.scrapeMode,
7356
+ ignore_errors: options.ignoreErrors,
7357
+ follow_redirects: options.followRedirects,
7358
+ has_include_patterns: Array.isArray(options.includePattern) && options.includePattern.length > 0,
7359
+ has_exclude_patterns: Array.isArray(options.excludePattern) && options.excludePattern.length > 0,
7360
+ has_custom_headers: Object.keys(headers).length > 0,
7361
+ using_remote_server: !!serverUrl,
7362
+ cli_flags: extractCliFlags(process.argv),
7363
+ is_async_job: !("pagesScraped" in result2),
7364
+ // Pipeline mode vs direct mode
7365
+ pages_scraped: "pagesScraped" in result2 ? result2.pagesScraped : void 0
7366
+ })
7367
+ );
5995
7368
  if ("pagesScraped" in result) {
5996
7369
  console.log(`✅ Successfully scraped ${result.pagesScraped} pages`);
5997
7370
  } else {
@@ -6073,13 +7446,28 @@ async function searchAction(library, query, options, command) {
6073
7446
  const docService = await createDocumentManagement({ serverUrl });
6074
7447
  try {
6075
7448
  const searchTool = new SearchTool(docService);
6076
- const result = await searchTool.execute({
6077
- library,
6078
- version: options.version,
6079
- query,
6080
- limit: Number.parseInt(options.limit),
6081
- exactMatch: options.exactMatch
6082
- });
7449
+ const result = await trackTool(
7450
+ "search_docs",
7451
+ () => searchTool.execute({
7452
+ library,
7453
+ version: options.version,
7454
+ query,
7455
+ limit: Number.parseInt(options.limit, 10),
7456
+ exactMatch: options.exactMatch
7457
+ }),
7458
+ (result2) => ({
7459
+ library,
7460
+ // Safe: library names are public
7461
+ query_analysis: analyzeSearchQuery(query),
7462
+ // Analyzed, not raw query
7463
+ result_count: result2.results.length,
7464
+ limit_used: Number.parseInt(options.limit, 10),
7465
+ has_version_filter: !!options.version,
7466
+ exact_match: options.exactMatch,
7467
+ using_remote_server: !!serverUrl,
7468
+ cli_flags: extractCliFlags(process.argv)
7469
+ })
7470
+ );
6083
7471
  console.log(formatOutput(result.results));
6084
7472
  } finally {
6085
7473
  await docService.shutdown();
@@ -6192,11 +7580,28 @@ function createCliProgram() {
6192
7580
  const program = new Command();
6193
7581
  program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version(packageJson.version).addOption(
6194
7582
  new Option("--verbose", "Enable verbose (debug) logging").conflicts("silent")
6195
- ).addOption(new Option("--silent", "Disable all logging except errors")).enablePositionalOptions().allowExcessArguments(false).showHelpAfterError(true);
6196
- program.hook("preAction", (thisCommand, _actionCommand) => {
7583
+ ).addOption(new Option("--silent", "Disable all logging except errors")).addOption(new Option("--no-telemetry", "Disable telemetry collection")).enablePositionalOptions().allowExcessArguments(false).showHelpAfterError(true);
7584
+ program.hook("preAction", (thisCommand, actionCommand) => {
6197
7585
  const globalOptions = thisCommand.opts();
6198
7586
  if (globalOptions.silent) setLogLevel(LogLevel.ERROR);
6199
7587
  else if (globalOptions.verbose) setLogLevel(LogLevel.DEBUG);
7588
+ if (shouldEnableTelemetry()) {
7589
+ const commandName = actionCommand.name();
7590
+ const session = createCliSession(commandName, {
7591
+ authEnabled: false,
7592
+ // CLI doesn't use auth
7593
+ readOnly: false
7594
+ });
7595
+ analytics.startSession(session);
7596
+ } else {
7597
+ TelemetryConfig.getInstance().disable();
7598
+ }
7599
+ });
7600
+ program.hook("postAction", async () => {
7601
+ if (analytics.isEnabled()) {
7602
+ analytics.endSession();
7603
+ await analytics.shutdown();
7604
+ }
6200
7605
  });
6201
7606
  createMcpCommand(program);
6202
7607
  createWebCommand(program);
@@ -6244,6 +7649,10 @@ const sigintHandler = async () => {
6244
7649
  activeDocService = null;
6245
7650
  logger.debug("SIGINT: DocumentManagementService shut down.");
6246
7651
  }
7652
+ if (analytics.isEnabled()) {
7653
+ await analytics.shutdown();
7654
+ logger.debug("SIGINT: Analytics shut down.");
7655
+ }
6247
7656
  logger.info("✅ Graceful shutdown completed");
6248
7657
  process.exit(0);
6249
7658
  } catch (error) {
@@ -6319,6 +7728,7 @@ export {
6319
7728
  EMBEDDING_BATCH_CHARS as E,
6320
7729
  LibraryNotFoundError as L,
6321
7730
  StoreError as S,
7731
+ TelemetryEvent as T,
6322
7732
  VECTOR_DIMENSION as V,
6323
7733
  applyMigrations as a,
6324
7734
  EMBEDDING_BATCH_SIZE as b,
@@ -6328,7 +7738,9 @@ export {
6328
7738
  SPLITTER_MAX_CHUNK_SIZE as f,
6329
7739
  getProjectRoot as g,
6330
7740
  VersionNotFoundError as h,
6331
- SPLITTER_MIN_CHUNK_SIZE as i,
7741
+ analytics as i,
7742
+ extractHostname as j,
7743
+ SPLITTER_MIN_CHUNK_SIZE as k,
6332
7744
  logger as l,
6333
7745
  mapDbDocumentToDocument as m,
6334
7746
  normalizeVersionName as n