@arabold/docs-mcp-server 1.20.0 → 1.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,9 +1,16 @@
1
+ #!/usr/bin/env node
1
2
  import "dotenv/config";
2
- import { Option, Command } from "commander";
3
+ import { PostHog } from "posthog-node";
4
+ import crypto, { randomUUID } from "node:crypto";
5
+ import fs, { readFileSync, existsSync } from "node:fs";
3
6
  import path from "node:path";
7
+ import envPaths from "env-paths";
8
+ import { Option, Command } from "commander";
4
9
  import formBody from "@fastify/formbody";
5
10
  import fastifyStatic from "@fastify/static";
6
11
  import Fastify from "fastify";
12
+ import { ProxyOAuthServerProvider } from "@modelcontextprotocol/sdk/server/auth/providers/proxyProvider.js";
13
+ import { createRemoteJWKSet, jwtVerify } from "jose";
7
14
  import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
8
15
  import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
9
16
  import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
@@ -17,7 +24,7 @@ import { chromium } from "playwright";
17
24
  import { gfm } from "@joplin/turndown-plugin-gfm";
18
25
  import TurndownService from "turndown";
19
26
  import iconv from "iconv-lite";
20
- import fs from "node:fs/promises";
27
+ import fs$1 from "node:fs/promises";
21
28
  import * as mime from "mime-types";
22
29
  import axios from "axios";
23
30
  import { HeaderGenerator } from "header-generator";
@@ -25,7 +32,6 @@ import { initTRPC } from "@trpc/server";
25
32
  import { fastifyTRPCPlugin } from "@trpc/server/adapters/fastify";
26
33
  import { z as z$1 } from "zod";
27
34
  import { jsxs, jsx, Fragment } from "@kitajs/html/jsx-runtime";
28
- import fs$1, { readFileSync, existsSync } from "node:fs";
29
35
  import { unified } from "unified";
30
36
  import remarkParse from "remark-parse";
31
37
  import remarkGfm from "remark-gfm";
@@ -34,14 +40,13 @@ import DOMPurify from "dompurify";
34
40
  import { fileURLToPath, URL as URL$1 } from "node:url";
35
41
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
36
42
  import { createTRPCProxyClient, httpBatchLink } from "@trpc/client";
37
- import "env-paths";
38
43
  import "fuse.js";
39
44
  import "langchain/text_splitter";
40
45
  import "better-sqlite3";
41
46
  import "sqlite-vec";
42
47
  import { execSync } from "node:child_process";
43
48
  import { v4 } from "uuid";
44
- import "psl";
49
+ import psl from "psl";
45
50
  import { minimatch } from "minimatch";
46
51
  const LogLevel = {
47
52
  ERROR: 0,
@@ -101,10 +106,806 @@ const logger = {
101
106
  }
102
107
  }
103
108
  };
104
- const version = "1.19.0";
109
+ class PostHogClient {
110
+ client;
111
+ enabled;
112
+ // PostHog configuration
113
+ static CONFIG = {
114
+ host: "https://app.posthog.com",
115
+ // Performance optimizations
116
+ flushAt: 20,
117
+ // Batch size - send after 20 events
118
+ flushInterval: 1e4,
119
+ // 10 seconds - send after time
120
+ // Privacy settings
121
+ disableGeoip: true,
122
+ // Don't collect IP geolocation
123
+ disableSessionRecording: true,
124
+ // Never record sessions
125
+ disableSurveys: true,
126
+ // No user surveys
127
+ // Data handling
128
+ persistence: "memory"
129
+ // No disk persistence for privacy
130
+ };
131
+ constructor(enabled) {
132
+ this.enabled = enabled;
133
+ if (this.enabled) {
134
+ try {
135
+ this.client = new PostHog("phc_g7pXZZdUiAQXdnwUANjloQWMvO0amEDTBaeDSWgXgrQ", {
136
+ host: PostHogClient.CONFIG.host,
137
+ flushAt: PostHogClient.CONFIG.flushAt,
138
+ flushInterval: PostHogClient.CONFIG.flushInterval,
139
+ disableGeoip: PostHogClient.CONFIG.disableGeoip
140
+ });
141
+ logger.debug("PostHog client initialized");
142
+ } catch (error) {
143
+ logger.debug(
144
+ `PostHog initialization failed: ${error instanceof Error ? error.message : "Unknown error"}`
145
+ );
146
+ this.enabled = false;
147
+ }
148
+ } else {
149
+ this.enabled = false;
150
+ logger.debug("PostHog client disabled");
151
+ }
152
+ }
153
+ /**
154
+ * Send event to PostHog
155
+ */
156
+ capture(distinctId, event, properties) {
157
+ if (!this.enabled || !this.client) return;
158
+ try {
159
+ this.client.capture({
160
+ distinctId,
161
+ event,
162
+ properties
163
+ });
164
+ logger.debug(`PostHog event captured: ${event}`);
165
+ } catch (error) {
166
+ logger.debug(
167
+ `PostHog capture error: ${error instanceof Error ? error.message : "Unknown error"}`
168
+ );
169
+ }
170
+ }
171
+ /**
172
+ * Graceful shutdown with event flushing
173
+ */
174
+ async shutdown() {
175
+ if (this.client) {
176
+ try {
177
+ await this.client.shutdown();
178
+ logger.debug("PostHog client shutdown complete");
179
+ } catch (error) {
180
+ logger.debug(
181
+ `PostHog shutdown error: ${error instanceof Error ? error.message : "Unknown error"}`
182
+ );
183
+ }
184
+ }
185
+ }
186
+ /**
187
+ * Check if client is enabled and ready
188
+ */
189
+ isEnabled() {
190
+ return this.enabled && !!this.client;
191
+ }
192
+ }
193
+ class SessionTracker {
194
+ sessionContext;
195
+ /**
196
+ * Start a new session with context
197
+ */
198
+ startSession(context) {
199
+ this.sessionContext = context;
200
+ }
201
+ /**
202
+ * End current session and return duration
203
+ */
204
+ endSession() {
205
+ if (!this.sessionContext) return null;
206
+ const duration = Date.now() - this.sessionContext.startTime.getTime();
207
+ const sessionInterface = this.sessionContext.interface;
208
+ this.sessionContext = void 0;
209
+ return { duration, interface: sessionInterface };
210
+ }
211
+ /**
212
+ * Get current session context
213
+ */
214
+ getSessionContext() {
215
+ return this.sessionContext;
216
+ }
217
+ /**
218
+ * Get enriched properties with session context
219
+ */
220
+ getEnrichedProperties(properties = {}) {
221
+ return {
222
+ ...this.sessionContext,
223
+ ...properties,
224
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
225
+ };
226
+ }
227
+ }
228
+ class TelemetryConfig {
229
+ static instance;
230
+ enabled;
231
+ constructor() {
232
+ this.enabled = this.determineEnabledState();
233
+ }
234
+ /**
235
+ * Determines if telemetry should be enabled based on CLI flags and environment variables.
236
+ * Priority: CLI flags > environment variables > default (true)
237
+ */
238
+ determineEnabledState() {
239
+ if (process.env.DOCS_MCP_TELEMETRY === "false") {
240
+ return false;
241
+ }
242
+ const args = process.argv;
243
+ if (args.includes("--no-telemetry")) {
244
+ return false;
245
+ }
246
+ return true;
247
+ }
248
+ isEnabled() {
249
+ return this.enabled;
250
+ }
251
+ disable() {
252
+ this.enabled = false;
253
+ }
254
+ enable() {
255
+ this.enabled = true;
256
+ }
257
+ static getInstance() {
258
+ if (!TelemetryConfig.instance) {
259
+ TelemetryConfig.instance = new TelemetryConfig();
260
+ }
261
+ return TelemetryConfig.instance;
262
+ }
263
+ }
264
+ function generateInstallationId() {
265
+ try {
266
+ const envStorePath = process.env.DOCS_MCP_STORE_PATH;
267
+ const dataDir = envStorePath || envPaths("docs-mcp-server", { suffix: "" }).data;
268
+ const installationIdPath = path.join(dataDir, "installation.id");
269
+ if (fs.existsSync(installationIdPath)) {
270
+ const existingId = fs.readFileSync(installationIdPath, "utf8").trim();
271
+ if (existingId) {
272
+ return existingId;
273
+ }
274
+ }
275
+ const newId = randomUUID();
276
+ fs.mkdirSync(dataDir, { recursive: true });
277
+ fs.writeFileSync(installationIdPath, newId, "utf8");
278
+ return newId;
279
+ } catch {
280
+ return randomUUID();
281
+ }
282
+ }
283
+ function shouldEnableTelemetry() {
284
+ return TelemetryConfig.getInstance().isEnabled();
285
+ }
286
+ var TelemetryEvent = /* @__PURE__ */ ((TelemetryEvent2) => {
287
+ TelemetryEvent2["SESSION_STARTED"] = "session_started";
288
+ TelemetryEvent2["SESSION_ENDED"] = "session_ended";
289
+ TelemetryEvent2["APP_STARTED"] = "app_started";
290
+ TelemetryEvent2["APP_SHUTDOWN"] = "app_shutdown";
291
+ TelemetryEvent2["COMMAND_EXECUTED"] = "command_executed";
292
+ TelemetryEvent2["TOOL_USED"] = "tool_used";
293
+ TelemetryEvent2["HTTP_REQUEST_COMPLETED"] = "http_request_completed";
294
+ TelemetryEvent2["PIPELINE_JOB_PROGRESS"] = "pipeline_job_progress";
295
+ TelemetryEvent2["PIPELINE_JOB_COMPLETED"] = "pipeline_job_completed";
296
+ TelemetryEvent2["DOCUMENT_PROCESSED"] = "document_processed";
297
+ TelemetryEvent2["DOCUMENT_PROCESSING_FAILED"] = "document_processing_failed";
298
+ TelemetryEvent2["ERROR_OCCURRED"] = "error_occurred";
299
+ return TelemetryEvent2;
300
+ })(TelemetryEvent || {});
301
+ class Analytics {
302
+ postHogClient;
303
+ sessionTracker;
304
+ enabled = true;
305
+ distinctId;
306
+ constructor(enabled) {
307
+ this.enabled = enabled ?? TelemetryConfig.getInstance().isEnabled();
308
+ this.distinctId = generateInstallationId();
309
+ this.postHogClient = new PostHogClient(this.enabled);
310
+ this.sessionTracker = new SessionTracker();
311
+ if (this.enabled) {
312
+ logger.debug("Analytics enabled");
313
+ } else {
314
+ logger.debug("Analytics disabled");
315
+ }
316
+ }
317
+ /**
318
+ * Initialize session context - call once per session
319
+ */
320
+ startSession(context) {
321
+ if (!this.enabled) return;
322
+ this.sessionTracker.startSession(context);
323
+ this.track("session_started", {
324
+ interface: context.interface,
325
+ version: context.version,
326
+ platform: context.platform,
327
+ sessionDurationTarget: context.interface === "cli" ? "short" : "long",
328
+ authEnabled: context.authEnabled,
329
+ readOnly: context.readOnly,
330
+ servicesCount: context.servicesEnabled.length
331
+ });
332
+ }
333
+ /**
334
+ * Track an event with automatic session context inclusion
335
+ */
336
+ track(event, properties = {}) {
337
+ if (!this.enabled) return;
338
+ const eventProperties = this.sessionTracker.getEnrichedProperties(properties);
339
+ this.postHogClient.capture(this.distinctId, event, eventProperties);
340
+ }
341
+ /**
342
+ * Track session end with duration
343
+ */
344
+ endSession() {
345
+ if (!this.enabled) return;
346
+ const sessionInfo = this.sessionTracker.endSession();
347
+ if (sessionInfo) {
348
+ this.track("session_ended", {
349
+ durationMs: sessionInfo.duration,
350
+ interface: sessionInfo.interface
351
+ });
352
+ }
353
+ }
354
+ /**
355
+ * Graceful shutdown with event flushing
356
+ */
357
+ async shutdown() {
358
+ await this.postHogClient.shutdown();
359
+ }
360
+ /**
361
+ * Check if analytics is enabled
362
+ */
363
+ isEnabled() {
364
+ return this.enabled && this.postHogClient.isEnabled();
365
+ }
366
+ /**
367
+ * Get current session context
368
+ */
369
+ getSessionContext() {
370
+ return this.sessionTracker.getSessionContext();
371
+ }
372
+ }
373
+ const analytics = new Analytics();
374
+ async function trackTool(toolName, operation, getProperties) {
375
+ const startTime = Date.now();
376
+ try {
377
+ const result = await operation();
378
+ analytics.track("tool_used", {
379
+ tool: toolName,
380
+ success: true,
381
+ durationMs: Date.now() - startTime,
382
+ ...getProperties ? getProperties(result) : {}
383
+ });
384
+ return result;
385
+ } catch (error) {
386
+ analytics.track("tool_used", {
387
+ tool: toolName,
388
+ success: false,
389
+ durationMs: Date.now() - startTime,
390
+ errorType: error instanceof Error ? error.constructor.name : "UnknownError"
391
+ });
392
+ throw error;
393
+ }
394
+ }
395
+ function extractHostname(url) {
396
+ try {
397
+ const parsed = new URL(url);
398
+ return parsed.hostname;
399
+ } catch {
400
+ return "invalid-hostname";
401
+ }
402
+ }
403
+ function extractProtocol(urlOrPath) {
404
+ try {
405
+ const parsed = new URL(urlOrPath);
406
+ return parsed.protocol.replace(":", "");
407
+ } catch {
408
+ if (urlOrPath.startsWith("/") || /^[A-Za-z]:/.test(urlOrPath)) {
409
+ return "file";
410
+ }
411
+ return "unknown";
412
+ }
413
+ }
414
+ function analyzeSearchQuery(query) {
415
+ return {
416
+ length: query.length,
417
+ wordCount: query.trim().split(/\s+/).length,
418
+ hasCodeTerms: /\b(function|class|import|export|const|let|var|def|async|await)\b/i.test(query),
419
+ hasSpecialChars: /[^\w\s]/.test(query)
420
+ };
421
+ }
422
+ function sanitizeErrorMessage(message) {
423
+ return message.replace(/https?:\/\/[^\s]+/gi, "[url]").replace(/file:\/\/[^\s]+/gi, "[file-url]").replace(/\/[^\s]*\.[a-z]{2,4}/gi, "[path]").replace(/[A-Za-z]:\\[^\s]+/g, "[path]").replace(/Bearer\s+[^\s]+/gi, "Bearer [token]").replace(/api[_-]?key[=:]\s*[^\s]+/gi, "api_key=[redacted]").replace(/token[=:]\s*[^\s]+/gi, "token=[redacted]").substring(0, 200);
424
+ }
425
+ function sanitizeError(error) {
426
+ return {
427
+ type: error.constructor.name,
428
+ message: sanitizeErrorMessage(error.message),
429
+ hasStack: Boolean(error.stack)
430
+ };
431
+ }
432
+ function extractCliFlags(argv) {
433
+ return argv.filter((arg) => arg.startsWith("--") || arg.startsWith("-"));
434
+ }
435
+ const version = "1.21.0";
105
436
  const packageJson = {
106
437
  version
107
438
  };
439
+ function getPackageVersion() {
440
+ return packageJson.version;
441
+ }
442
+ function createCliSession(command, options) {
443
+ return {
444
+ sessionId: randomUUID(),
445
+ interface: "cli",
446
+ startTime: /* @__PURE__ */ new Date(),
447
+ version: getPackageVersion(),
448
+ platform: process.platform,
449
+ nodeVersion: process.version,
450
+ command: command || "unknown",
451
+ authEnabled: options?.authEnabled,
452
+ readOnly: options?.readOnly,
453
+ servicesEnabled: ["worker"]
454
+ // CLI typically runs embedded worker
455
+ };
456
+ }
457
+ function createMcpSession(options) {
458
+ return {
459
+ sessionId: randomUUID(),
460
+ interface: "mcp",
461
+ startTime: /* @__PURE__ */ new Date(),
462
+ version: getPackageVersion(),
463
+ platform: process.platform,
464
+ nodeVersion: process.version,
465
+ protocol: options.protocol || "stdio",
466
+ transport: options.transport,
467
+ authEnabled: options.authEnabled ?? false,
468
+ readOnly: options.readOnly ?? false,
469
+ servicesEnabled: options.servicesEnabled ?? ["mcp"]
470
+ };
471
+ }
472
+ function createTelemetryService() {
473
+ return {
474
+ startSession: (context) => {
475
+ analytics.startSession(context);
476
+ },
477
+ endSession: () => {
478
+ analytics.endSession();
479
+ },
480
+ shutdown: async () => {
481
+ await analytics.shutdown();
482
+ }
483
+ };
484
+ }
485
+ const telemetryService = createTelemetryService();
486
+ function createAuthMiddleware(authManager) {
487
+ return async (request, reply) => {
488
+ try {
489
+ const authContext = await authManager.createAuthContext(
490
+ request.headers.authorization || "",
491
+ request
492
+ );
493
+ request.auth = authContext;
494
+ const isAuthEnabled = authManager.authConfig.enabled;
495
+ if (!isAuthEnabled) {
496
+ logger.debug("Authentication disabled, allowing request");
497
+ return;
498
+ }
499
+ if (!authContext.authenticated) {
500
+ const hasAuthHeader = !!request.headers.authorization;
501
+ if (hasAuthHeader) {
502
+ logger.debug("Token validation failed");
503
+ reply.status(401).header(
504
+ "WWW-Authenticate",
505
+ 'Bearer realm="MCP Server", error="invalid_token"'
506
+ ).send({
507
+ error: "invalid_token",
508
+ error_description: "The access token is invalid"
509
+ });
510
+ return;
511
+ } else {
512
+ logger.debug("Missing authorization header");
513
+ reply.status(401).header("WWW-Authenticate", 'Bearer realm="MCP Server"').send({
514
+ error: "unauthorized",
515
+ error_description: "Authorization header required"
516
+ });
517
+ return;
518
+ }
519
+ }
520
+ logger.debug(
521
+ `Authentication successful for subject: ${authContext.subject || "anonymous"}`
522
+ );
523
+ } catch (error) {
524
+ const message = error instanceof Error ? error.message : "Authentication failed";
525
+ logger.debug(`Authentication error: ${message}`);
526
+ reply.status(401).header("WWW-Authenticate", 'Bearer realm="MCP Server", error="invalid_token"').send({
527
+ error: "invalid_token",
528
+ error_description: "Token validation failed"
529
+ });
530
+ }
531
+ };
532
+ }
533
+ class ProxyAuthManager {
534
+ constructor(config) {
535
+ this.config = config;
536
+ }
537
+ proxyProvider = null;
538
+ discoveredEndpoints = null;
539
+ jwks = null;
540
+ /**
541
+ * Get the authentication configuration
542
+ */
543
+ get authConfig() {
544
+ return this.config;
545
+ }
546
+ /**
547
+ * Initialize the proxy auth manager with the configured OAuth provider.
548
+ */
549
+ async initialize() {
550
+ if (!this.config.enabled) {
551
+ logger.debug("Authentication disabled, skipping proxy auth manager initialization");
552
+ return;
553
+ }
554
+ if (!this.config.issuerUrl || !this.config.audience) {
555
+ throw new Error("Issuer URL and Audience are required when auth is enabled");
556
+ }
557
+ try {
558
+ logger.info("🔐 Initializing OAuth2 proxy authentication...");
559
+ this.discoveredEndpoints = await this.discoverEndpoints();
560
+ if (this.discoveredEndpoints.jwksUri) {
561
+ this.jwks = createRemoteJWKSet(new URL(this.discoveredEndpoints.jwksUri));
562
+ logger.debug(`JWKS configured from: ${this.discoveredEndpoints.jwksUri}`);
563
+ }
564
+ const capabilities = [];
565
+ if (this.discoveredEndpoints.jwksUri) capabilities.push("JWT validation via JWKS");
566
+ if (this.discoveredEndpoints.userinfoUrl)
567
+ capabilities.push("opaque token validation via userinfo");
568
+ logger.debug(`Token validation capabilities: ${capabilities.join(", ")}`);
569
+ if (capabilities.length === 0) {
570
+ logger.warn(
571
+ "⚠️ No token validation mechanisms available - authentication may fail"
572
+ );
573
+ }
574
+ this.proxyProvider = new ProxyOAuthServerProvider({
575
+ endpoints: {
576
+ authorizationUrl: this.discoveredEndpoints.authorizationUrl,
577
+ tokenUrl: this.discoveredEndpoints.tokenUrl,
578
+ revocationUrl: this.discoveredEndpoints.revocationUrl,
579
+ registrationUrl: this.discoveredEndpoints.registrationUrl
580
+ },
581
+ verifyAccessToken: this.verifyAccessToken.bind(this),
582
+ getClient: this.getClient.bind(this)
583
+ });
584
+ logger.info("✅ OAuth2 proxy authentication initialized successfully");
585
+ } catch (error) {
586
+ const message = error instanceof Error ? error.message : "Unknown error";
587
+ logger.error(`❌ Failed to initialize OAuth2 proxy authentication: ${message}`);
588
+ throw new Error(`Proxy authentication initialization failed: ${message}`);
589
+ }
590
+ }
591
+ /**
592
+ * Register OAuth2 endpoints on the Fastify server.
593
+ * This manually implements the necessary OAuth2 endpoints using the proxy provider.
594
+ */
595
+ registerRoutes(server, baseUrl) {
596
+ if (!this.proxyProvider) {
597
+ throw new Error("Proxy provider not initialized");
598
+ }
599
+ server.get("/.well-known/oauth-authorization-server", async (_request, reply) => {
600
+ const metadata = {
601
+ issuer: baseUrl.origin,
602
+ authorization_endpoint: `${baseUrl.origin}/oauth/authorize`,
603
+ token_endpoint: `${baseUrl.origin}/oauth/token`,
604
+ revocation_endpoint: `${baseUrl.origin}/oauth/revoke`,
605
+ registration_endpoint: `${baseUrl.origin}/oauth/register`,
606
+ scopes_supported: ["profile", "email"],
607
+ response_types_supported: ["code"],
608
+ grant_types_supported: ["authorization_code", "refresh_token"],
609
+ token_endpoint_auth_methods_supported: [
610
+ "client_secret_basic",
611
+ "client_secret_post",
612
+ "none"
613
+ ],
614
+ code_challenge_methods_supported: ["S256"]
615
+ };
616
+ reply.type("application/json").send(metadata);
617
+ });
618
+ server.get("/.well-known/oauth-protected-resource", async (request, reply) => {
619
+ const baseUrl2 = `${request.protocol}://${request.headers.host}`;
620
+ const metadata = {
621
+ resource: `${baseUrl2}/sse`,
622
+ authorization_servers: [this.config.issuerUrl],
623
+ scopes_supported: ["profile", "email"],
624
+ bearer_methods_supported: ["header"],
625
+ resource_name: "Documentation MCP Server",
626
+ resource_documentation: "https://github.com/arabold/docs-mcp-server#readme",
627
+ // Enhanced metadata for better discoverability
628
+ resource_server_metadata_url: `${baseUrl2}/.well-known/oauth-protected-resource`,
629
+ authorization_server_metadata_url: `${this.config.issuerUrl}/.well-known/openid-configuration`,
630
+ jwks_uri: `${this.config.issuerUrl}/.well-known/jwks.json`,
631
+ // Supported MCP transports
632
+ mcp_transports: [
633
+ {
634
+ transport: "sse",
635
+ endpoint: `${baseUrl2}/sse`,
636
+ description: "Server-Sent Events transport"
637
+ },
638
+ {
639
+ transport: "http",
640
+ endpoint: `${baseUrl2}/mcp`,
641
+ description: "Streaming HTTP transport"
642
+ }
643
+ ]
644
+ };
645
+ reply.type("application/json").send(metadata);
646
+ });
647
+ server.get("/oauth/authorize", async (request, reply) => {
648
+ const endpoints = await this.discoverEndpoints();
649
+ const params = new URLSearchParams(request.query);
650
+ if (!params.has("resource")) {
651
+ const resourceUrl = `${request.protocol}://${request.headers.host}/sse`;
652
+ params.set("resource", resourceUrl);
653
+ }
654
+ const redirectUrl = `${endpoints.authorizationUrl}?${params.toString()}`;
655
+ reply.redirect(redirectUrl);
656
+ });
657
+ server.post("/oauth/token", async (request, reply) => {
658
+ const endpoints = await this.discoverEndpoints();
659
+ const tokenBody = new URLSearchParams(request.body);
660
+ if (!tokenBody.has("resource")) {
661
+ const resourceUrl = `${request.protocol}://${request.headers.host}/sse`;
662
+ tokenBody.set("resource", resourceUrl);
663
+ }
664
+ const response = await fetch(endpoints.tokenUrl, {
665
+ method: "POST",
666
+ headers: {
667
+ "Content-Type": "application/x-www-form-urlencoded"
668
+ },
669
+ body: tokenBody.toString()
670
+ });
671
+ const data = await response.json();
672
+ reply.status(response.status).type("application/json").send(data);
673
+ });
674
+ server.post("/oauth/revoke", async (request, reply) => {
675
+ const endpoints = await this.discoverEndpoints();
676
+ if (endpoints.revocationUrl) {
677
+ const response = await fetch(endpoints.revocationUrl, {
678
+ method: "POST",
679
+ headers: {
680
+ "Content-Type": "application/x-www-form-urlencoded"
681
+ },
682
+ body: new URLSearchParams(request.body).toString()
683
+ });
684
+ reply.status(response.status).send();
685
+ } else {
686
+ reply.status(404).send({ error: "Revocation not supported" });
687
+ }
688
+ });
689
+ server.post("/oauth/register", async (request, reply) => {
690
+ const endpoints = await this.discoverEndpoints();
691
+ if (endpoints.registrationUrl) {
692
+ const response = await fetch(endpoints.registrationUrl, {
693
+ method: "POST",
694
+ headers: {
695
+ "Content-Type": "application/json"
696
+ },
697
+ body: JSON.stringify(request.body)
698
+ });
699
+ const data = await response.json();
700
+ reply.status(response.status).type("application/json").send(data);
701
+ } else {
702
+ reply.status(404).send({ error: "Dynamic client registration not supported" });
703
+ }
704
+ });
705
+ logger.debug("OAuth2 endpoints registered on Fastify server");
706
+ }
707
+ /**
708
+ * Discover OAuth endpoints from the OAuth2 authorization server.
709
+ * Uses OAuth2 discovery (RFC 8414) with OIDC discovery fallback.
710
+ * Supports both JWT and opaque token validation methods.
711
+ */
712
+ async discoverEndpoints() {
713
+ const oauthDiscoveryUrl = `${this.config.issuerUrl}/.well-known/oauth-authorization-server`;
714
+ try {
715
+ const oauthResponse = await fetch(oauthDiscoveryUrl);
716
+ if (oauthResponse.ok) {
717
+ const config2 = await oauthResponse.json();
718
+ logger.debug(
719
+ `Successfully discovered OAuth2 endpoints from: ${oauthDiscoveryUrl}`
720
+ );
721
+ const userinfoEndpoint = await this.discoverUserinfoEndpoint();
722
+ if (userinfoEndpoint) {
723
+ config2.userinfo_endpoint = userinfoEndpoint;
724
+ }
725
+ return this.buildEndpointsFromConfig(config2);
726
+ }
727
+ } catch (error) {
728
+ logger.debug(`OAuth2 discovery failed: ${error}, trying OIDC discovery`);
729
+ }
730
+ const oidcDiscoveryUrl = `${this.config.issuerUrl}/.well-known/openid-configuration`;
731
+ const oidcResponse = await fetch(oidcDiscoveryUrl);
732
+ if (!oidcResponse.ok) {
733
+ throw new Error(
734
+ `Failed to fetch configuration from both ${oauthDiscoveryUrl} and ${oidcDiscoveryUrl}`
735
+ );
736
+ }
737
+ const config = await oidcResponse.json();
738
+ logger.debug(`Successfully discovered OIDC endpoints from: ${oidcDiscoveryUrl}`);
739
+ return this.buildEndpointsFromConfig(config);
740
+ }
741
+ /**
742
+ * Try to discover userinfo endpoint for opaque token validation
743
+ */
744
+ async discoverUserinfoEndpoint() {
745
+ try {
746
+ const oidcDiscoveryUrl = `${this.config.issuerUrl}/.well-known/openid-configuration`;
747
+ const response = await fetch(oidcDiscoveryUrl);
748
+ if (response.ok) {
749
+ const config = await response.json();
750
+ return config.userinfo_endpoint || null;
751
+ }
752
+ } catch (error) {
753
+ logger.debug(`Failed to fetch userinfo endpoint: ${error}`);
754
+ }
755
+ return null;
756
+ }
757
+ /**
758
+ * Build endpoint configuration from discovery response.
759
+ */
760
+ buildEndpointsFromConfig(config) {
761
+ return {
762
+ authorizationUrl: config.authorization_endpoint,
763
+ tokenUrl: config.token_endpoint,
764
+ revocationUrl: config.revocation_endpoint,
765
+ registrationUrl: config.registration_endpoint,
766
+ jwksUri: config.jwks_uri,
767
+ userinfoUrl: config.userinfo_endpoint
768
+ };
769
+ }
770
+ /**
771
+ * Get supported resource URLs for this MCP server instance.
772
+ * This enables self-discovering resource validation per MCP Authorization spec.
773
+ */
774
+ getSupportedResources(request) {
775
+ const baseUrl = `${request.protocol}://${request.headers.host}`;
776
+ return [
777
+ `${baseUrl}/sse`,
778
+ // SSE transport
779
+ `${baseUrl}/mcp`,
780
+ // Streaming HTTP transport
781
+ `${baseUrl}`
782
+ // Server root
783
+ ];
784
+ }
785
+ /**
786
+ * Verify an access token using hybrid validation approach.
787
+ * First tries JWT validation with JWKS, falls back to userinfo endpoint for opaque tokens.
788
+ * This provides universal compatibility with all OAuth2 providers and token formats.
789
+ */
790
+ async verifyAccessToken(token, request) {
791
+ logger.debug(`Attempting to verify token: ${token.substring(0, 20)}...`);
792
+ if (this.jwks) {
793
+ try {
794
+ logger.debug("Attempting JWT validation with JWKS...");
795
+ const { payload } = await jwtVerify(token, this.jwks, {
796
+ issuer: this.config.issuerUrl,
797
+ audience: this.config.audience
798
+ });
799
+ logger.debug(
800
+ `JWT validation successful. Subject: ${payload.sub}, Audience: ${payload.aud}`
801
+ );
802
+ if (!payload.sub) {
803
+ throw new Error("JWT payload missing subject claim");
804
+ }
805
+ return {
806
+ token,
807
+ clientId: payload.sub,
808
+ scopes: ["*"]
809
+ // Full access for all authenticated users
810
+ };
811
+ } catch (error) {
812
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
813
+ logger.debug(
814
+ `JWT validation failed: ${errorMessage}, trying userinfo fallback...`
815
+ );
816
+ }
817
+ }
818
+ if (this.discoveredEndpoints?.userinfoUrl) {
819
+ try {
820
+ logger.debug("Attempting userinfo endpoint validation...");
821
+ const response = await fetch(this.discoveredEndpoints.userinfoUrl, {
822
+ method: "GET",
823
+ headers: {
824
+ Authorization: `Bearer ${token}`,
825
+ Accept: "application/json"
826
+ }
827
+ });
828
+ if (!response.ok) {
829
+ throw new Error(
830
+ `Userinfo request failed: ${response.status} ${response.statusText}`
831
+ );
832
+ }
833
+ const userinfo = await response.json();
834
+ logger.debug(
835
+ `Token validation successful. User: ${userinfo.sub}, Email: ${userinfo.email}`
836
+ );
837
+ if (!userinfo.sub) {
838
+ throw new Error("Userinfo response missing subject");
839
+ }
840
+ if (request) {
841
+ const supportedResources = this.getSupportedResources(request);
842
+ logger.debug(`Supported resources: ${JSON.stringify(supportedResources)}`);
843
+ }
844
+ return {
845
+ token,
846
+ clientId: userinfo.sub,
847
+ scopes: ["*"]
848
+ // Full access for all authenticated users
849
+ };
850
+ } catch (error) {
851
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
852
+ logger.debug(`Userinfo validation failed: ${errorMessage}`);
853
+ }
854
+ }
855
+ logger.debug("All token validation strategies exhausted");
856
+ throw new Error("Invalid access token");
857
+ }
858
+ /**
859
+ * Get client information for the given client ID.
860
+ * This is called by the proxy provider for client validation.
861
+ */
862
+ async getClient(clientId) {
863
+ return {
864
+ client_id: clientId,
865
+ redirect_uris: [`${this.config.audience}/callback`]
866
+ // Add other client metadata as needed
867
+ };
868
+ }
869
+ /**
870
+ * Create an authentication context from a token (for compatibility with existing middleware).
871
+ * Uses binary authentication - valid token grants full access.
872
+ */
873
+ async createAuthContext(authorization, request) {
874
+ if (!this.config.enabled) {
875
+ return {
876
+ authenticated: false,
877
+ scopes: /* @__PURE__ */ new Set()
878
+ };
879
+ }
880
+ try {
881
+ logger.debug(
882
+ `Processing authorization header: ${authorization.substring(0, 20)}...`
883
+ );
884
+ const match = authorization.match(/^Bearer\s+(.+)$/i);
885
+ if (!match) {
886
+ logger.debug("Authorization header does not match Bearer token pattern");
887
+ throw new Error("Invalid authorization header format");
888
+ }
889
+ const token = match[1];
890
+ logger.debug(`Extracted token: ${token.substring(0, 20)}...`);
891
+ const authInfo = await this.verifyAccessToken(token, request);
892
+ logger.debug(`Authentication successful for client: ${authInfo.clientId}`);
893
+ return {
894
+ authenticated: true,
895
+ scopes: /* @__PURE__ */ new Set(["*"]),
896
+ // Full access for authenticated users
897
+ subject: authInfo.clientId
898
+ };
899
+ } catch (error) {
900
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
901
+ logger.debug(`Authentication failed: ${errorMessage}`);
902
+ return {
903
+ authenticated: false,
904
+ scopes: /* @__PURE__ */ new Set()
905
+ };
906
+ }
907
+ }
908
+ }
108
909
  var PipelineJobStatus = /* @__PURE__ */ ((PipelineJobStatus2) => {
109
910
  PipelineJobStatus2["QUEUED"] = "queued";
110
911
  PipelineJobStatus2["RUNNING"] = "running";
@@ -185,7 +986,7 @@ class ClearCompletedJobsTool {
185
986
  try {
186
987
  const clearedCount = await this.pipeline.clearCompletedJobs();
187
988
  const message = clearedCount > 0 ? `Successfully cleared ${clearedCount} completed job${clearedCount === 1 ? "" : "s"} from the queue.` : "No completed jobs to clear.";
188
- logger.debug(`[ClearCompletedJobsTool] ${message}`);
989
+ logger.debug(message);
189
990
  return {
190
991
  message,
191
992
  success: true,
@@ -193,7 +994,7 @@ class ClearCompletedJobsTool {
193
994
  };
194
995
  } catch (error) {
195
996
  const errorMessage = `Failed to clear completed jobs: ${error instanceof Error ? error.message : String(error)}`;
196
- logger.error(`❌ [ClearCompletedJobsTool] ${errorMessage}`);
997
+ logger.error(`❌ ${errorMessage}`);
197
998
  return {
198
999
  message: errorMessage,
199
1000
  success: false,
@@ -1139,7 +1940,7 @@ class FetchUrlTool {
1139
1940
  async execute(options) {
1140
1941
  const { url, scrapeMode = ScrapeMode.Auto, headers } = options;
1141
1942
  const canFetchResults = this.fetchers.map((f) => f.canFetch(url));
1142
- const fetcherIndex = canFetchResults.findIndex((result) => result === true);
1943
+ const fetcherIndex = canFetchResults.indexOf(true);
1143
1944
  if (fetcherIndex === -1) {
1144
1945
  throw new ToolError(
1145
1946
  `Invalid URL: ${url}. Must be an HTTP/HTTPS URL or a file:// URL.`,
@@ -1372,14 +2173,13 @@ class RemoveTool {
1372
2173
  this.pipeline = pipeline;
1373
2174
  }
1374
2175
  /**
1375
- * Executes the tool to remove the specified library version documents.
2176
+ * Executes the tool to remove the specified library version completely.
1376
2177
  * Aborts any QUEUED/RUNNING job for the same library+version before deleting.
2178
+ * Removes all documents, the version record, and the library if no other versions exist.
1377
2179
  */
1378
2180
  async execute(args) {
1379
2181
  const { library, version: version2 } = args;
1380
- logger.info(
1381
- `🗑️ Removing library: ${library}${version2 ? `, version: ${version2}` : " (unversioned)"}`
1382
- );
2182
+ logger.info(`🗑️ Removing library: ${library}${version2 ? `@${version2}` : ""}`);
1383
2183
  try {
1384
2184
  const allJobs = await this.pipeline.getJobs();
1385
2185
  const jobs = allJobs.filter(
@@ -1392,12 +2192,12 @@ class RemoveTool {
1392
2192
  await this.pipeline.cancelJob(job.id);
1393
2193
  await this.pipeline.waitForJobCompletion(job.id);
1394
2194
  }
1395
- await this.documentManagementService.removeAllDocuments(library, version2);
1396
- const message = `Successfully removed documents for ${library}${version2 ? `@${version2}` : " (unversioned)"}.`;
2195
+ await this.documentManagementService.removeVersion(library, version2);
2196
+ const message = `Successfully removed ${library}${version2 ? `@${version2}` : ""}.`;
1397
2197
  logger.info(`✅ ${message}`);
1398
2198
  return { message };
1399
2199
  } catch (error) {
1400
- const errorMessage = `Failed to remove documents for ${library}${version2 ? `@${version2}` : " (unversioned)"}: ${error instanceof Error ? error.message : String(error)}`;
2200
+ const errorMessage = `Failed to remove ${library}${version2 ? `@${version2}` : ""}: ${error instanceof Error ? error.message : String(error)}`;
1401
2201
  logger.error(`❌ Error removing library: ${errorMessage}`);
1402
2202
  throw new ToolError(errorMessage, this.constructor.name);
1403
2203
  }
@@ -1546,7 +2346,7 @@ function createError(text) {
1546
2346
  isError: true
1547
2347
  };
1548
2348
  }
1549
- function createMcpServerInstance(tools) {
2349
+ function createMcpServerInstance(tools, readOnly = false) {
1550
2350
  const server = new McpServer(
1551
2351
  {
1552
2352
  name: "docs-mcp-server",
@@ -1560,54 +2360,56 @@ function createMcpServerInstance(tools) {
1560
2360
  }
1561
2361
  }
1562
2362
  );
1563
- server.tool(
1564
- "scrape_docs",
1565
- "Scrape and index documentation from a URL for a library. Use this tool to index a new library or a new version.",
1566
- {
1567
- url: z.string().url().describe("Documentation root URL to scrape."),
1568
- library: z.string().describe("Library name."),
1569
- version: z.string().optional().describe("Library version (optional)."),
1570
- maxPages: z.number().optional().default(DEFAULT_MAX_PAGES).describe(`Maximum number of pages to scrape (default: ${DEFAULT_MAX_PAGES}).`),
1571
- maxDepth: z.number().optional().default(DEFAULT_MAX_DEPTH$1).describe(`Maximum navigation depth (default: ${DEFAULT_MAX_DEPTH$1}).`),
1572
- scope: z.enum(["subpages", "hostname", "domain"]).optional().default("subpages").describe("Crawling boundary: 'subpages', 'hostname', or 'domain'."),
1573
- followRedirects: z.boolean().optional().default(true).describe("Follow HTTP redirects (3xx responses).")
1574
- },
1575
- {
1576
- title: "Scrape New Library Documentation",
1577
- destructiveHint: true,
1578
- // replaces existing docs
1579
- openWorldHint: true
1580
- // requires internet access
1581
- },
1582
- async ({ url, library, version: version2, maxPages, maxDepth, scope, followRedirects }) => {
1583
- try {
1584
- const result = await tools.scrape.execute({
1585
- url,
1586
- library,
1587
- version: version2,
1588
- waitForCompletion: false,
1589
- // Don't wait for completion
1590
- // onProgress: undefined, // Explicitly undefined or omitted
1591
- options: {
1592
- maxPages,
1593
- maxDepth,
1594
- scope,
1595
- followRedirects
2363
+ if (!readOnly) {
2364
+ server.tool(
2365
+ "scrape_docs",
2366
+ "Scrape and index documentation from a URL for a library. Use this tool to index a new library or a new version.",
2367
+ {
2368
+ url: z.string().url().describe("Documentation root URL to scrape."),
2369
+ library: z.string().describe("Library name."),
2370
+ version: z.string().optional().describe("Library version (optional)."),
2371
+ maxPages: z.number().optional().default(DEFAULT_MAX_PAGES).describe(`Maximum number of pages to scrape (default: ${DEFAULT_MAX_PAGES}).`),
2372
+ maxDepth: z.number().optional().default(DEFAULT_MAX_DEPTH$1).describe(`Maximum navigation depth (default: ${DEFAULT_MAX_DEPTH$1}).`),
2373
+ scope: z.enum(["subpages", "hostname", "domain"]).optional().default("subpages").describe("Crawling boundary: 'subpages', 'hostname', or 'domain'."),
2374
+ followRedirects: z.boolean().optional().default(true).describe("Follow HTTP redirects (3xx responses).")
2375
+ },
2376
+ {
2377
+ title: "Scrape New Library Documentation",
2378
+ destructiveHint: true,
2379
+ // replaces existing docs
2380
+ openWorldHint: true
2381
+ // requires internet access
2382
+ },
2383
+ async ({ url, library, version: version2, maxPages, maxDepth, scope, followRedirects }) => {
2384
+ try {
2385
+ const result = await tools.scrape.execute({
2386
+ url,
2387
+ library,
2388
+ version: version2,
2389
+ waitForCompletion: false,
2390
+ // Don't wait for completion
2391
+ // onProgress: undefined, // Explicitly undefined or omitted
2392
+ options: {
2393
+ maxPages,
2394
+ maxDepth,
2395
+ scope,
2396
+ followRedirects
2397
+ }
2398
+ });
2399
+ if ("jobId" in result) {
2400
+ return createResponse(`🚀 Scraping job started with ID: ${result.jobId}.`);
1596
2401
  }
1597
- });
1598
- if ("jobId" in result) {
1599
- return createResponse(`🚀 Scraping job started with ID: ${result.jobId}.`);
2402
+ return createResponse(
2403
+ `Scraping finished immediately (unexpectedly) with ${result.pagesScraped} pages.`
2404
+ );
2405
+ } catch (error) {
2406
+ return createError(
2407
+ `Failed to scrape documentation: ${error instanceof Error ? error.message : String(error)}`
2408
+ );
1600
2409
  }
1601
- return createResponse(
1602
- `Scraping finished immediately (unexpectedly) with ${result.pagesScraped} pages.`
1603
- );
1604
- } catch (error) {
1605
- return createError(
1606
- `Failed to scrape documentation: ${error instanceof Error ? error.message : String(error)}`
1607
- );
1608
2410
  }
1609
- }
1610
- );
2411
+ );
2412
+ }
1611
2413
  server.tool(
1612
2414
  "search_docs",
1613
2415
  'Search up-to-date documentation for a library or package. Examples:\n\n- {library: "react", query: "hooks lifecycle"} -> matches latest version of React\n- {library: "react", version: "18.0.0", query: "hooks lifecycle"} -> matches React 18.0.0 or earlier\n- {library: "typescript", version: "5.x", query: "ReturnType example"} -> any TypeScript 5.x.x version\n- {library: "typescript", version: "5.2.x", query: "ReturnType example"} -> any TypeScript 5.2.x version',
@@ -1728,24 +2530,25 @@ ${result.libraries.map((lib) => `- ${lib.name}`).join("\n")}`
1728
2530
  }
1729
2531
  }
1730
2532
  );
1731
- server.tool(
1732
- "list_jobs",
1733
- "List all indexing jobs. Optionally filter by status.",
1734
- {
1735
- status: z.enum(["queued", "running", "completed", "failed", "cancelling", "cancelled"]).optional().describe("Filter jobs by status (optional).")
1736
- },
1737
- {
1738
- title: "List Indexing Jobs",
1739
- readOnlyHint: true,
1740
- destructiveHint: false
1741
- },
1742
- async ({ status }) => {
1743
- try {
1744
- const result = await tools.listJobs.execute({
1745
- status
1746
- });
1747
- const formattedJobs = result.jobs.map(
1748
- (job) => `- ID: ${job.id}
2533
+ if (!readOnly) {
2534
+ server.tool(
2535
+ "list_jobs",
2536
+ "List all indexing jobs. Optionally filter by status.",
2537
+ {
2538
+ status: z.enum(["queued", "running", "completed", "failed", "cancelling", "cancelled"]).optional().describe("Filter jobs by status (optional).")
2539
+ },
2540
+ {
2541
+ title: "List Indexing Jobs",
2542
+ readOnlyHint: true,
2543
+ destructiveHint: false
2544
+ },
2545
+ async ({ status }) => {
2546
+ try {
2547
+ const result = await tools.listJobs.execute({
2548
+ status
2549
+ });
2550
+ const formattedJobs = result.jobs.map(
2551
+ (job) => `- ID: ${job.id}
1749
2552
  Status: ${job.status}
1750
2553
  Library: ${job.library}
1751
2554
  Version: ${job.version}
@@ -1753,100 +2556,101 @@ ${result.libraries.map((lib) => `- ${lib.name}`).join("\n")}`
1753
2556
  Started: ${job.startedAt}` : ""}${job.finishedAt ? `
1754
2557
  Finished: ${job.finishedAt}` : ""}${job.error ? `
1755
2558
  Error: ${job.error}` : ""}`
1756
- ).join("\n\n");
1757
- return createResponse(
1758
- result.jobs.length > 0 ? `Current Jobs:
2559
+ ).join("\n\n");
2560
+ return createResponse(
2561
+ result.jobs.length > 0 ? `Current Jobs:
1759
2562
 
1760
2563
  ${formattedJobs}` : "No jobs found."
1761
- );
1762
- } catch (error) {
1763
- return createError(
1764
- `Failed to list jobs: ${error instanceof Error ? error.message : String(error)}`
1765
- );
1766
- }
1767
- }
1768
- );
1769
- server.tool(
1770
- "get_job_info",
1771
- "Get details for a specific indexing job. Use the 'list_jobs' tool to find the job ID.",
1772
- {
1773
- jobId: z.string().uuid().describe("Job ID to query.")
1774
- },
1775
- {
1776
- title: "Get Indexing Job Info",
1777
- readOnlyHint: true,
1778
- destructiveHint: false
1779
- },
1780
- async ({ jobId }) => {
1781
- try {
1782
- const result = await tools.getJobInfo.execute({ jobId });
1783
- if (!result.job) {
1784
- return createError(`Job with ID ${jobId} not found.`);
2564
+ );
2565
+ } catch (error) {
2566
+ return createError(
2567
+ `Failed to list jobs: ${error instanceof Error ? error.message : String(error)}`
2568
+ );
1785
2569
  }
1786
- const job = result.job;
1787
- const formattedJob = `- ID: ${job.id}
2570
+ }
2571
+ );
2572
+ server.tool(
2573
+ "get_job_info",
2574
+ "Get details for a specific indexing job. Use the 'list_jobs' tool to find the job ID.",
2575
+ {
2576
+ jobId: z.string().uuid().describe("Job ID to query.")
2577
+ },
2578
+ {
2579
+ title: "Get Indexing Job Info",
2580
+ readOnlyHint: true,
2581
+ destructiveHint: false
2582
+ },
2583
+ async ({ jobId }) => {
2584
+ try {
2585
+ const result = await tools.getJobInfo.execute({ jobId });
2586
+ if (!result.job) {
2587
+ return createError(`Job with ID ${jobId} not found.`);
2588
+ }
2589
+ const job = result.job;
2590
+ const formattedJob = `- ID: ${job.id}
1788
2591
  Status: ${job.status}
1789
2592
  Library: ${job.library}@${job.version}
1790
2593
  Created: ${job.createdAt}${job.startedAt ? `
1791
2594
  Started: ${job.startedAt}` : ""}${job.finishedAt ? `
1792
2595
  Finished: ${job.finishedAt}` : ""}${job.error ? `
1793
2596
  Error: ${job.error}` : ""}`;
1794
- return createResponse(`Job Info:
2597
+ return createResponse(`Job Info:
1795
2598
 
1796
2599
  ${formattedJob}`);
1797
- } catch (error) {
1798
- return createError(
1799
- `Failed to get job info for ${jobId}: ${error instanceof Error ? error.message : String(error)}`
1800
- );
2600
+ } catch (error) {
2601
+ return createError(
2602
+ `Failed to get job info for ${jobId}: ${error instanceof Error ? error.message : String(error)}`
2603
+ );
2604
+ }
2605
+ }
2606
+ );
2607
+ server.tool(
2608
+ "cancel_job",
2609
+ "Cancel a queued or running indexing job. Use the 'list_jobs' tool to find the job ID.",
2610
+ {
2611
+ jobId: z.string().uuid().describe("Job ID to cancel.")
2612
+ },
2613
+ {
2614
+ title: "Cancel Indexing Job",
2615
+ destructiveHint: true
2616
+ },
2617
+ async ({ jobId }) => {
2618
+ try {
2619
+ const result = await tools.cancelJob.execute({ jobId });
2620
+ if (result.success) {
2621
+ return createResponse(result.message);
2622
+ }
2623
+ return createError(result.message);
2624
+ } catch (error) {
2625
+ return createError(
2626
+ `Failed to cancel job ${jobId}: ${error instanceof Error ? error.message : String(error)}`
2627
+ );
2628
+ }
1801
2629
  }
1802
- }
1803
- );
1804
- server.tool(
1805
- "cancel_job",
1806
- "Cancel a queued or running indexing job. Use the 'list_jobs' tool to find the job ID.",
1807
- {
1808
- jobId: z.string().uuid().describe("Job ID to cancel.")
1809
- },
1810
- {
1811
- title: "Cancel Indexing Job",
1812
- destructiveHint: true
1813
- },
1814
- async ({ jobId }) => {
1815
- try {
1816
- const result = await tools.cancelJob.execute({ jobId });
1817
- if (result.success) {
2630
+ );
2631
+ server.tool(
2632
+ "remove_docs",
2633
+ "Remove indexed documentation for a library version. Use only if explicitly instructed.",
2634
+ {
2635
+ library: z.string().describe("Library name."),
2636
+ version: z.string().optional().describe("Library version (optional, removes unversioned if omitted).")
2637
+ },
2638
+ {
2639
+ title: "Remove Library Documentation",
2640
+ destructiveHint: true
2641
+ },
2642
+ async ({ library, version: version2 }) => {
2643
+ try {
2644
+ const result = await tools.remove.execute({ library, version: version2 });
1818
2645
  return createResponse(result.message);
2646
+ } catch (error) {
2647
+ return createError(
2648
+ `Failed to remove documents: ${error instanceof Error ? error.message : String(error)}`
2649
+ );
1819
2650
  }
1820
- return createError(result.message);
1821
- } catch (error) {
1822
- return createError(
1823
- `Failed to cancel job ${jobId}: ${error instanceof Error ? error.message : String(error)}`
1824
- );
1825
- }
1826
- }
1827
- );
1828
- server.tool(
1829
- "remove_docs",
1830
- "Remove indexed documentation for a library version. Use only if explicitly instructed.",
1831
- {
1832
- library: z.string().describe("Library name."),
1833
- version: z.string().optional().describe("Library version (optional, removes unversioned if omitted).")
1834
- },
1835
- {
1836
- title: "Remove Library Documentation",
1837
- destructiveHint: true
1838
- },
1839
- async ({ library, version: version2 }) => {
1840
- try {
1841
- const result = await tools.remove.execute({ library, version: version2 });
1842
- return createResponse(result.message);
1843
- } catch (error) {
1844
- return createError(
1845
- `Failed to remove documents: ${error instanceof Error ? error.message : String(error)}`
1846
- );
1847
2651
  }
1848
- }
1849
- );
2652
+ );
2653
+ }
1850
2654
  server.tool(
1851
2655
  "fetch_url",
1852
2656
  "Fetch a single URL and convert its content to Markdown. Use this tool to read the content of any web page.",
@@ -1910,74 +2714,76 @@ ${formattedJob}`);
1910
2714
  };
1911
2715
  }
1912
2716
  );
1913
- server.resource(
1914
- "jobs",
1915
- "docs://jobs",
1916
- {
1917
- description: "List indexing jobs, optionally filtering by status.",
1918
- mimeType: "application/json"
1919
- },
1920
- async (uri) => {
1921
- const statusParam = uri.searchParams.get("status");
1922
- let statusFilter;
1923
- if (statusParam) {
1924
- const validation = z.nativeEnum(PipelineJobStatus).safeParse(statusParam);
1925
- if (validation.success) {
1926
- statusFilter = validation.data;
1927
- } else {
1928
- logger.warn(`⚠️ Invalid status parameter received: ${statusParam}`);
2717
+ if (!readOnly) {
2718
+ server.resource(
2719
+ "jobs",
2720
+ "docs://jobs",
2721
+ {
2722
+ description: "List indexing jobs, optionally filtering by status.",
2723
+ mimeType: "application/json"
2724
+ },
2725
+ async (uri) => {
2726
+ const statusParam = uri.searchParams.get("status");
2727
+ let statusFilter;
2728
+ if (statusParam) {
2729
+ const validation = z.nativeEnum(PipelineJobStatus).safeParse(statusParam);
2730
+ if (validation.success) {
2731
+ statusFilter = validation.data;
2732
+ } else {
2733
+ logger.warn(`⚠️ Invalid status parameter received: ${statusParam}`);
2734
+ }
1929
2735
  }
1930
- }
1931
- const result = await tools.listJobs.execute({ status: statusFilter });
1932
- return {
1933
- contents: result.jobs.map((job) => ({
1934
- uri: new URL(job.id, uri).href,
1935
- mimeType: "application/json",
1936
- text: JSON.stringify({
1937
- id: job.id,
1938
- library: job.library,
1939
- version: job.version,
1940
- status: job.status,
1941
- error: job.error || void 0
1942
- })
1943
- }))
1944
- };
1945
- }
1946
- );
1947
- server.resource(
1948
- "job",
1949
- // A distinct name for this specific resource type
1950
- new ResourceTemplate("docs://jobs/{jobId}", { list: void 0 }),
1951
- {
1952
- description: "Get details for a specific indexing job by ID.",
1953
- mimeType: "application/json"
1954
- },
1955
- async (uri, { jobId }) => {
1956
- if (typeof jobId !== "string" || jobId.length === 0) {
1957
- logger.warn(`⚠️ Invalid jobId received in URI: ${jobId}`);
1958
- return { contents: [] };
1959
- }
1960
- const result = await tools.getJobInfo.execute({ jobId });
1961
- if (!result.job) {
1962
- return { contents: [] };
1963
- }
1964
- return {
1965
- contents: [
1966
- {
1967
- uri: uri.href,
2736
+ const result = await tools.listJobs.execute({ status: statusFilter });
2737
+ return {
2738
+ contents: result.jobs.map((job) => ({
2739
+ uri: new URL(job.id, uri).href,
1968
2740
  mimeType: "application/json",
1969
2741
  text: JSON.stringify({
1970
- id: result.job.id,
1971
- library: result.job.library,
1972
- version: result.job.version,
1973
- status: result.job.status,
1974
- error: result.job.error || void 0
2742
+ id: job.id,
2743
+ library: job.library,
2744
+ version: job.version,
2745
+ status: job.status,
2746
+ error: job.error || void 0
1975
2747
  })
1976
- }
1977
- ]
1978
- };
1979
- }
1980
- );
2748
+ }))
2749
+ };
2750
+ }
2751
+ );
2752
+ server.resource(
2753
+ "job",
2754
+ // A distinct name for this specific resource type
2755
+ new ResourceTemplate("docs://jobs/{jobId}", { list: void 0 }),
2756
+ {
2757
+ description: "Get details for a specific indexing job by ID.",
2758
+ mimeType: "application/json"
2759
+ },
2760
+ async (uri, { jobId }) => {
2761
+ if (typeof jobId !== "string" || jobId.length === 0) {
2762
+ logger.warn(`⚠️ Invalid jobId received in URI: ${jobId}`);
2763
+ return { contents: [] };
2764
+ }
2765
+ const result = await tools.getJobInfo.execute({ jobId });
2766
+ if (!result.job) {
2767
+ return { contents: [] };
2768
+ }
2769
+ return {
2770
+ contents: [
2771
+ {
2772
+ uri: uri.href,
2773
+ mimeType: "application/json",
2774
+ text: JSON.stringify({
2775
+ id: result.job.id,
2776
+ library: result.job.library,
2777
+ version: result.job.version,
2778
+ status: result.job.status,
2779
+ error: result.job.error || void 0
2780
+ })
2781
+ }
2782
+ ]
2783
+ };
2784
+ }
2785
+ );
2786
+ }
1981
2787
  return server;
1982
2788
  }
1983
2789
  class FileFetcher {
@@ -1992,7 +2798,7 @@ class FileFetcher {
1992
2798
  const rawPath = source.replace("file://", "");
1993
2799
  const filePath = decodeURIComponent(rawPath);
1994
2800
  try {
1995
- const content = await fs.readFile(filePath);
2801
+ const content = await fs$1.readFile(filePath);
1996
2802
  const ext = path.extname(filePath).toLowerCase();
1997
2803
  const mimeType = mime.lookup(ext) || "application/octet-stream";
1998
2804
  return {
@@ -2083,9 +2889,49 @@ class HttpFetcher {
2083
2889
  return new Promise((resolve) => setTimeout(resolve, ms));
2084
2890
  }
2085
2891
  async fetch(source, options) {
2892
+ const startTime = performance.now();
2086
2893
  const maxRetries = options?.maxRetries ?? FETCHER_MAX_RETRIES;
2087
2894
  const baseDelay = options?.retryDelay ?? FETCHER_BASE_DELAY;
2088
2895
  const followRedirects = options?.followRedirects ?? true;
2896
+ try {
2897
+ const result = await this.performFetch(
2898
+ source,
2899
+ options,
2900
+ maxRetries,
2901
+ baseDelay,
2902
+ followRedirects
2903
+ );
2904
+ const duration = performance.now() - startTime;
2905
+ analytics.track("http_request_completed", {
2906
+ success: true,
2907
+ hostname: extractHostname(source),
2908
+ protocol: extractProtocol(source),
2909
+ duration_ms: Math.round(duration),
2910
+ content_size_bytes: result.content.length,
2911
+ mime_type: result.mimeType,
2912
+ has_encoding: !!result.encoding,
2913
+ follow_redirects: followRedirects,
2914
+ had_redirects: result.source !== source
2915
+ });
2916
+ return result;
2917
+ } catch (error) {
2918
+ const duration = performance.now() - startTime;
2919
+ const axiosError = error;
2920
+ const status = axiosError.response?.status;
2921
+ analytics.track("http_request_completed", {
2922
+ success: false,
2923
+ hostname: extractHostname(source),
2924
+ protocol: extractProtocol(source),
2925
+ duration_ms: Math.round(duration),
2926
+ status_code: status,
2927
+ error_type: error instanceof CancellationError ? "cancellation" : error instanceof RedirectError ? "redirect" : error instanceof ScraperError ? "scraper" : "unknown",
2928
+ error_code: axiosError.code,
2929
+ follow_redirects: followRedirects
2930
+ });
2931
+ throw error;
2932
+ }
2933
+ }
2934
+ async performFetch(source, options, maxRetries = FETCHER_MAX_RETRIES, baseDelay = FETCHER_BASE_DELAY, followRedirects = true) {
2089
2935
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
2090
2936
  try {
2091
2937
  const fingerprint = this.fingerprintGenerator.generateHeaders();
@@ -2185,20 +3031,35 @@ async function initializeTools(docService, pipeline) {
2185
3031
  };
2186
3032
  return tools;
2187
3033
  }
2188
- async function registerMcpService(server, docService, pipeline) {
3034
+ async function registerMcpService(server, docService, pipeline, readOnly = false, authManager) {
2189
3035
  const mcpTools = await initializeTools(docService, pipeline);
2190
- const mcpServer = createMcpServerInstance(mcpTools);
3036
+ const mcpServer = createMcpServerInstance(mcpTools, readOnly);
3037
+ const authMiddleware = authManager ? createAuthMiddleware(authManager) : null;
2191
3038
  const sseTransports = {};
2192
3039
  server.route({
2193
3040
  method: "GET",
2194
3041
  url: "/sse",
3042
+ preHandler: authMiddleware ? [authMiddleware] : void 0,
2195
3043
  handler: async (_request, reply) => {
2196
3044
  try {
2197
3045
  const transport = new SSEServerTransport("/messages", reply.raw);
2198
3046
  sseTransports[transport.sessionId] = transport;
3047
+ if (analytics.isEnabled()) {
3048
+ const session = createMcpSession({
3049
+ protocol: "http",
3050
+ transport: "sse",
3051
+ authEnabled: !!authManager,
3052
+ readOnly,
3053
+ servicesEnabled: ["mcp"]
3054
+ });
3055
+ analytics.startSession(session);
3056
+ }
2199
3057
  reply.raw.on("close", () => {
2200
3058
  delete sseTransports[transport.sessionId];
2201
3059
  transport.close();
3060
+ if (analytics.isEnabled()) {
3061
+ analytics.endSession();
3062
+ }
2202
3063
  });
2203
3064
  await mcpServer.connect(transport);
2204
3065
  } catch (error) {
@@ -2233,16 +3094,30 @@ async function registerMcpService(server, docService, pipeline) {
2233
3094
  server.route({
2234
3095
  method: "POST",
2235
3096
  url: "/mcp",
3097
+ preHandler: authMiddleware ? [authMiddleware] : void 0,
2236
3098
  handler: async (request, reply) => {
2237
3099
  try {
2238
- const requestServer = createMcpServerInstance(mcpTools);
3100
+ const requestServer = createMcpServerInstance(mcpTools, readOnly);
2239
3101
  const requestTransport = new StreamableHTTPServerTransport({
2240
3102
  sessionIdGenerator: void 0
2241
3103
  });
3104
+ if (analytics.isEnabled()) {
3105
+ const session = createMcpSession({
3106
+ protocol: "http",
3107
+ transport: "streamable",
3108
+ authEnabled: !!authManager,
3109
+ readOnly,
3110
+ servicesEnabled: ["mcp"]
3111
+ });
3112
+ analytics.startSession(session);
3113
+ }
2242
3114
  reply.raw.on("close", () => {
2243
3115
  logger.debug("Streamable HTTP request closed");
2244
3116
  requestTransport.close();
2245
3117
  requestServer.close();
3118
+ if (analytics.isEnabled()) {
3119
+ analytics.endSession();
3120
+ }
2246
3121
  });
2247
3122
  await requestServer.connect(requestTransport);
2248
3123
  await requestTransport.handleRequest(request.raw, reply.raw, request.body);
@@ -3066,410 +3941,475 @@ const Tooltip = ({ text, position = "top" }) => {
3066
3941
  }
3067
3942
  );
3068
3943
  };
3069
- const ScrapeFormContent = () => /* @__PURE__ */ jsxs("div", { class: "mt-4 p-4 bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-300 dark:border-gray-600", children: [
3070
- /* @__PURE__ */ jsx("h3", { class: "text-xl font-semibold text-gray-900 dark:text-white mb-2", children: "Queue New Scrape Job" }),
3071
- /* @__PURE__ */ jsxs(
3072
- "form",
3073
- {
3074
- "hx-post": "/web/jobs/scrape",
3075
- "hx-target": "#job-response",
3076
- "hx-swap": "innerHTML",
3077
- class: "space-y-2",
3078
- "x-data": "{\n url: '',\n hasPath: false,\n headers: [],\n checkUrlPath() {\n try {\n const url = new URL(this.url);\n this.hasPath = url.pathname !== '/' && url.pathname !== '';\n } catch (e) {\n this.hasPath = false;\n }\n }\n }",
3079
- children: [
3080
- /* @__PURE__ */ jsxs("div", { children: [
3081
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3082
- /* @__PURE__ */ jsx(
3083
- "label",
3084
- {
3085
- for: "url",
3086
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3087
- children: "URL"
3088
- }
3089
- ),
3090
- /* @__PURE__ */ jsx(
3091
- Tooltip,
3092
- {
3093
- text: /* @__PURE__ */ jsxs("div", { children: [
3094
- /* @__PURE__ */ jsx("p", { children: "Enter the URL of the documentation you want to scrape." }),
3095
- /* @__PURE__ */ jsxs("p", { class: "mt-2", children: [
3096
- "For local files/folders, you must use the ",
3097
- /* @__PURE__ */ jsx("code", { children: "file://" }),
3098
- " ",
3099
- "prefix and ensure the path is accessible to the server."
3100
- ] }),
3101
- /* @__PURE__ */ jsxs("p", { class: "mt-2", children: [
3102
- "If running in Docker, ",
3103
- /* @__PURE__ */ jsx("b", { children: "mount the folder" }),
3104
- " (see README for details)."
3105
- ] })
3106
- ] })
3107
- }
3108
- )
3109
- ] }),
3110
- /* @__PURE__ */ jsx(
3111
- "input",
3112
- {
3113
- type: "url",
3114
- name: "url",
3115
- id: "url",
3116
- required: true,
3117
- "x-model": "url",
3118
- "x-on:input": "checkUrlPath",
3119
- "x-on:paste": "$nextTick(() => checkUrlPath())",
3120
- class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3121
- }
3122
- ),
3123
- /* @__PURE__ */ jsx(
3124
- "div",
3125
- {
3126
- "x-show": "hasPath && !(url.startsWith('file://'))",
3127
- "x-cloak": true,
3128
- "x-transition:enter": "transition ease-out duration-300",
3129
- "x-transition:enter-start": "opacity-0 transform -translate-y-2",
3130
- "x-transition:enter-end": "opacity-100 transform translate-y-0",
3131
- class: "mt-2",
3132
- children: /* @__PURE__ */ jsx(
3133
- Alert,
3944
+ const ScrapeFormContent = ({ defaultExcludePatterns }) => {
3945
+ const defaultExcludePatternsText = defaultExcludePatterns?.join("\n") || "";
3946
+ return /* @__PURE__ */ jsxs("div", { class: "mt-4 p-4 bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-300 dark:border-gray-600", children: [
3947
+ /* @__PURE__ */ jsx("h3", { class: "text-xl font-semibold text-gray-900 dark:text-white mb-2", children: "Queue New Scrape Job" }),
3948
+ /* @__PURE__ */ jsxs(
3949
+ "form",
3950
+ {
3951
+ "hx-post": "/web/jobs/scrape",
3952
+ "hx-target": "#job-response",
3953
+ "hx-swap": "innerHTML",
3954
+ class: "space-y-2",
3955
+ "x-data": "{\n url: '',\n hasPath: false,\n headers: [],\n checkUrlPath() {\n try {\n const url = new URL(this.url);\n this.hasPath = url.pathname !== '/' && url.pathname !== '';\n } catch (e) {\n this.hasPath = false;\n }\n }\n }",
3956
+ children: [
3957
+ /* @__PURE__ */ jsxs("div", { children: [
3958
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3959
+ /* @__PURE__ */ jsx(
3960
+ "label",
3961
+ {
3962
+ for: "url",
3963
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3964
+ children: "URL"
3965
+ }
3966
+ ),
3967
+ /* @__PURE__ */ jsx(
3968
+ Tooltip,
3134
3969
  {
3135
- type: "info",
3136
- message: "By default, only subpages under the given URL will be scraped. To scrape the whole website, adjust the 'Scope' option in Advanced Options."
3970
+ text: /* @__PURE__ */ jsxs("div", { children: [
3971
+ /* @__PURE__ */ jsx("p", { children: "Enter the URL of the documentation you want to scrape." }),
3972
+ /* @__PURE__ */ jsxs("p", { class: "mt-2", children: [
3973
+ "For local files/folders, you must use the ",
3974
+ /* @__PURE__ */ jsx("code", { children: "file://" }),
3975
+ " ",
3976
+ "prefix and ensure the path is accessible to the server."
3977
+ ] }),
3978
+ /* @__PURE__ */ jsxs("p", { class: "mt-2", children: [
3979
+ "If running in Docker, ",
3980
+ /* @__PURE__ */ jsx("b", { children: "mount the folder" }),
3981
+ " (see README for details)."
3982
+ ] })
3983
+ ] })
3137
3984
  }
3138
3985
  )
3139
- }
3140
- )
3141
- ] }),
3142
- /* @__PURE__ */ jsxs("div", { children: [
3143
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3986
+ ] }),
3144
3987
  /* @__PURE__ */ jsx(
3145
- "label",
3988
+ "input",
3146
3989
  {
3147
- for: "library",
3148
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3149
- children: "Library Name"
3990
+ type: "url",
3991
+ name: "url",
3992
+ id: "url",
3993
+ required: true,
3994
+ "x-model": "url",
3995
+ "x-on:input": "checkUrlPath",
3996
+ "x-on:paste": "$nextTick(() => checkUrlPath())",
3997
+ class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3150
3998
  }
3151
3999
  ),
3152
- /* @__PURE__ */ jsx(Tooltip, { text: "The name of the library you're documenting. This will be used when searching." })
3153
- ] }),
3154
- /* @__PURE__ */ jsx(
3155
- "input",
3156
- {
3157
- type: "text",
3158
- name: "library",
3159
- id: "library",
3160
- required: true,
3161
- class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3162
- }
3163
- )
3164
- ] }),
3165
- /* @__PURE__ */ jsxs("div", { children: [
3166
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3167
4000
  /* @__PURE__ */ jsx(
3168
- "label",
4001
+ "div",
3169
4002
  {
3170
- for: "version",
3171
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3172
- children: "Version (optional)"
3173
- }
3174
- ),
3175
- /* @__PURE__ */ jsx(Tooltip, { text: "Specify the version of the library documentation you're indexing. This allows for version-specific searches." })
3176
- ] }),
3177
- /* @__PURE__ */ jsx(
3178
- "input",
3179
- {
3180
- type: "text",
3181
- name: "version",
3182
- id: "version",
3183
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3184
- }
3185
- )
3186
- ] }),
3187
- /* @__PURE__ */ jsxs("details", { class: "bg-gray-50 dark:bg-gray-900 p-2 rounded-md", children: [
3188
- /* @__PURE__ */ jsx("summary", { class: "cursor-pointer text-sm font-medium text-gray-600 dark:text-gray-400", children: "Advanced Options" }),
3189
- /* @__PURE__ */ jsxs("div", { class: "mt-2 space-y-2", "x-data": "{ headers: [] }", children: [
3190
- /* @__PURE__ */ jsxs("div", { children: [
3191
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3192
- /* @__PURE__ */ jsx(
3193
- "label",
4003
+ "x-show": "hasPath && !(url.startsWith('file://'))",
4004
+ "x-cloak": true,
4005
+ "x-transition:enter": "transition ease-out duration-300",
4006
+ "x-transition:enter-start": "opacity-0 transform -translate-y-2",
4007
+ "x-transition:enter-end": "opacity-100 transform translate-y-0",
4008
+ class: "mt-2",
4009
+ children: /* @__PURE__ */ jsx(
4010
+ Alert,
3194
4011
  {
3195
- for: "maxPages",
3196
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3197
- children: "Max Pages"
4012
+ type: "info",
4013
+ message: "By default, only subpages under the given URL will be scraped. To scrape the whole website, adjust the 'Scope' option in Advanced Options."
3198
4014
  }
3199
- ),
3200
- /* @__PURE__ */ jsx(Tooltip, { text: "The maximum number of pages to scrape. Default is 1000. Setting this too high may result in longer processing times." })
3201
- ] }),
4015
+ )
4016
+ }
4017
+ )
4018
+ ] }),
4019
+ /* @__PURE__ */ jsxs("div", { children: [
4020
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3202
4021
  /* @__PURE__ */ jsx(
3203
- "input",
4022
+ "label",
3204
4023
  {
3205
- type: "number",
3206
- name: "maxPages",
3207
- id: "maxPages",
3208
- min: "1",
3209
- placeholder: "1000",
3210
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
4024
+ for: "library",
4025
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4026
+ children: "Library Name"
3211
4027
  }
3212
- )
4028
+ ),
4029
+ /* @__PURE__ */ jsx(Tooltip, { text: "The name of the library you're documenting. This will be used when searching." })
3213
4030
  ] }),
3214
- /* @__PURE__ */ jsxs("div", { children: [
3215
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3216
- /* @__PURE__ */ jsx(
3217
- "label",
3218
- {
3219
- for: "maxDepth",
3220
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3221
- children: "Max Depth"
3222
- }
3223
- ),
3224
- /* @__PURE__ */ jsx(Tooltip, { text: "How many links deep the scraper should follow. Default is 3. Higher values capture more content but increase processing time." })
3225
- ] }),
4031
+ /* @__PURE__ */ jsx(
4032
+ "input",
4033
+ {
4034
+ type: "text",
4035
+ name: "library",
4036
+ id: "library",
4037
+ required: true,
4038
+ class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
4039
+ }
4040
+ )
4041
+ ] }),
4042
+ /* @__PURE__ */ jsxs("div", { children: [
4043
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3226
4044
  /* @__PURE__ */ jsx(
3227
- "input",
4045
+ "label",
3228
4046
  {
3229
- type: "number",
3230
- name: "maxDepth",
3231
- id: "maxDepth",
3232
- min: "0",
3233
- placeholder: "3",
3234
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
4047
+ for: "version",
4048
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4049
+ children: "Version (optional)"
3235
4050
  }
3236
- )
4051
+ ),
4052
+ /* @__PURE__ */ jsx(Tooltip, { text: "Specify the version of the library documentation you're indexing. This allows for version-specific searches." })
3237
4053
  ] }),
3238
- /* @__PURE__ */ jsxs("div", { children: [
3239
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3240
- /* @__PURE__ */ jsx(
3241
- "label",
3242
- {
3243
- for: "scope",
3244
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3245
- children: "Scope"
3246
- }
3247
- ),
4054
+ /* @__PURE__ */ jsx(
4055
+ "input",
4056
+ {
4057
+ type: "text",
4058
+ name: "version",
4059
+ id: "version",
4060
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
4061
+ }
4062
+ )
4063
+ ] }),
4064
+ /* @__PURE__ */ jsxs("details", { class: "bg-gray-50 dark:bg-gray-900 p-2 rounded-md", children: [
4065
+ /* @__PURE__ */ jsx("summary", { class: "cursor-pointer text-sm font-medium text-gray-600 dark:text-gray-400", children: "Advanced Options" }),
4066
+ /* @__PURE__ */ jsxs("div", { class: "mt-2 space-y-2", "x-data": "{ headers: [] }", children: [
4067
+ /* @__PURE__ */ jsxs("div", { children: [
4068
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4069
+ /* @__PURE__ */ jsx(
4070
+ "label",
4071
+ {
4072
+ for: "maxPages",
4073
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4074
+ children: "Max Pages"
4075
+ }
4076
+ ),
4077
+ /* @__PURE__ */ jsx(Tooltip, { text: "The maximum number of pages to scrape. Default is 1000. Setting this too high may result in longer processing times." })
4078
+ ] }),
3248
4079
  /* @__PURE__ */ jsx(
3249
- Tooltip,
4080
+ "input",
3250
4081
  {
3251
- text: /* @__PURE__ */ jsxs("div", { children: [
3252
- "Controls which pages are scraped:",
3253
- /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
3254
- /* @__PURE__ */ jsx("li", { children: "'Subpages' only scrapes under the given URL path," }),
3255
- /* @__PURE__ */ jsx("li", { children: "'Hostname' scrapes all content on the same host (e.g., all of docs.example.com)," }),
3256
- /* @__PURE__ */ jsx("li", { children: "'Domain' scrapes all content on the domain and its subdomains (e.g., all of example.com)." })
3257
- ] })
3258
- ] })
4082
+ type: "number",
4083
+ name: "maxPages",
4084
+ id: "maxPages",
4085
+ min: "1",
4086
+ placeholder: "1000",
4087
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3259
4088
  }
3260
4089
  )
3261
4090
  ] }),
3262
- /* @__PURE__ */ jsxs(
3263
- "select",
3264
- {
3265
- name: "scope",
3266
- id: "scope",
3267
- class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
3268
- children: [
3269
- /* @__PURE__ */ jsx("option", { value: "subpages", selected: true, children: "Subpages (Default)" }),
3270
- /* @__PURE__ */ jsx("option", { value: "hostname", children: "Hostname" }),
3271
- /* @__PURE__ */ jsx("option", { value: "domain", children: "Domain" })
3272
- ]
3273
- }
3274
- )
3275
- ] }),
3276
- /* @__PURE__ */ jsxs("div", { children: [
3277
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3278
- /* @__PURE__ */ jsx(
3279
- "label",
3280
- {
3281
- for: "includePatterns",
3282
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3283
- children: "Include Patterns"
3284
- }
3285
- ),
3286
- /* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to include. One per line or comma-separated. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
3287
- ] }),
3288
- /* @__PURE__ */ jsx(
3289
- "textarea",
3290
- {
3291
- name: "includePatterns",
3292
- id: "includePatterns",
3293
- rows: "2",
3294
- placeholder: "e.g. docs/* or /api\\/v1.*/",
3295
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3296
- }
3297
- )
3298
- ] }),
3299
- /* @__PURE__ */ jsxs("div", { children: [
3300
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4091
+ /* @__PURE__ */ jsxs("div", { children: [
4092
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4093
+ /* @__PURE__ */ jsx(
4094
+ "label",
4095
+ {
4096
+ for: "maxDepth",
4097
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4098
+ children: "Max Depth"
4099
+ }
4100
+ ),
4101
+ /* @__PURE__ */ jsx(Tooltip, { text: "How many links deep the scraper should follow. Default is 3. Higher values capture more content but increase processing time." })
4102
+ ] }),
3301
4103
  /* @__PURE__ */ jsx(
3302
- "label",
4104
+ "input",
3303
4105
  {
3304
- for: "excludePatterns",
3305
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3306
- children: "Exclude Patterns"
4106
+ type: "number",
4107
+ name: "maxDepth",
4108
+ id: "maxDepth",
4109
+ min: "0",
4110
+ placeholder: "3",
4111
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3307
4112
  }
3308
- ),
3309
- /* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to exclude. One per line or comma-separated. Exclude takes precedence over include. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
4113
+ )
3310
4114
  ] }),
3311
- /* @__PURE__ */ jsx(
3312
- "textarea",
3313
- {
3314
- name: "excludePatterns",
3315
- id: "excludePatterns",
3316
- rows: "2",
3317
- placeholder: "e.g. private/* or /internal/",
3318
- class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3319
- }
3320
- )
3321
- ] }),
3322
- /* @__PURE__ */ jsxs("div", { children: [
3323
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3324
- /* @__PURE__ */ jsx(
3325
- "label",
4115
+ /* @__PURE__ */ jsxs("div", { children: [
4116
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4117
+ /* @__PURE__ */ jsx(
4118
+ "label",
4119
+ {
4120
+ for: "scope",
4121
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4122
+ children: "Scope"
4123
+ }
4124
+ ),
4125
+ /* @__PURE__ */ jsx(
4126
+ Tooltip,
4127
+ {
4128
+ text: /* @__PURE__ */ jsxs("div", { children: [
4129
+ "Controls which pages are scraped:",
4130
+ /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
4131
+ /* @__PURE__ */ jsx("li", { children: "'Subpages' only scrapes under the given URL path," }),
4132
+ /* @__PURE__ */ jsx("li", { children: "'Hostname' scrapes all content on the same host (e.g., all of docs.example.com)," }),
4133
+ /* @__PURE__ */ jsx("li", { children: "'Domain' scrapes all content on the domain and its subdomains (e.g., all of example.com)." })
4134
+ ] })
4135
+ ] })
4136
+ }
4137
+ )
4138
+ ] }),
4139
+ /* @__PURE__ */ jsxs(
4140
+ "select",
3326
4141
  {
3327
- for: "scrapeMode",
3328
- class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
3329
- children: "Scrape Mode"
4142
+ name: "scope",
4143
+ id: "scope",
4144
+ class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
4145
+ children: [
4146
+ /* @__PURE__ */ jsx("option", { value: "subpages", selected: true, children: "Subpages (Default)" }),
4147
+ /* @__PURE__ */ jsx("option", { value: "hostname", children: "Hostname" }),
4148
+ /* @__PURE__ */ jsx("option", { value: "domain", children: "Domain" })
4149
+ ]
3330
4150
  }
3331
- ),
4151
+ )
4152
+ ] }),
4153
+ /* @__PURE__ */ jsxs("div", { children: [
4154
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4155
+ /* @__PURE__ */ jsx(
4156
+ "label",
4157
+ {
4158
+ for: "includePatterns",
4159
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4160
+ children: "Include Patterns"
4161
+ }
4162
+ ),
4163
+ /* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to include. One per line or comma-separated. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
4164
+ ] }),
3332
4165
  /* @__PURE__ */ jsx(
3333
- Tooltip,
4166
+ "textarea",
3334
4167
  {
3335
- text: /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
3336
- /* @__PURE__ */ jsx("li", { children: "'Auto' automatically selects the best method," }),
3337
- /* @__PURE__ */ jsx("li", { children: "'Fetch' uses simple HTTP requests (faster but may miss dynamic content)," }),
3338
- /* @__PURE__ */ jsx("li", { children: "'Playwright' uses a headless browser (slower but better for JS-heavy sites)." })
3339
- ] }) })
4168
+ name: "includePatterns",
4169
+ id: "includePatterns",
4170
+ rows: "2",
4171
+ placeholder: "e.g. docs/* or /api\\/v1.*/",
4172
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
3340
4173
  }
3341
4174
  )
3342
4175
  ] }),
3343
- /* @__PURE__ */ jsxs(
3344
- "select",
3345
- {
3346
- name: "scrapeMode",
3347
- id: "scrapeMode",
3348
- class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
3349
- children: [
3350
- /* @__PURE__ */ jsx("option", { value: ScrapeMode.Auto, selected: true, children: "Auto (Default)" }),
3351
- /* @__PURE__ */ jsx("option", { value: ScrapeMode.Fetch, children: "Fetch" }),
3352
- /* @__PURE__ */ jsx("option", { value: ScrapeMode.Playwright, children: "Playwright" })
3353
- ]
3354
- }
3355
- )
3356
- ] }),
3357
- /* @__PURE__ */ jsxs("div", { children: [
3358
- /* @__PURE__ */ jsxs("div", { class: "flex items-center mb-1", children: [
3359
- /* @__PURE__ */ jsx("label", { class: "block text-sm font-medium text-gray-700 dark:text-gray-300", children: "Custom HTTP Headers" }),
3360
- /* @__PURE__ */ jsx(Tooltip, { text: "Add custom HTTP headers (e.g., for authentication). These will be sent with every HTTP request." })
3361
- ] }),
3362
4176
  /* @__PURE__ */ jsxs("div", { children: [
3363
- /* @__PURE__ */ jsx("template", { "x-for": "(header, idx) in headers", children: /* @__PURE__ */ jsxs("div", { class: "flex space-x-2 mb-1", children: [
4177
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3364
4178
  /* @__PURE__ */ jsx(
3365
- "input",
4179
+ "label",
3366
4180
  {
3367
- type: "text",
3368
- class: "w-1/3 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
3369
- placeholder: "Header Name",
3370
- "x-model": "header.name",
3371
- required: true
4181
+ for: "excludePatterns",
4182
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4183
+ children: "Exclude Patterns"
3372
4184
  }
3373
4185
  ),
3374
- /* @__PURE__ */ jsx("span", { class: "text-gray-500", children: ":" }),
4186
+ /* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to exclude. One per line or comma-separated. Exclude takes precedence over include. Regex patterns must be wrapped in slashes, e.g. /pattern/. Edit or clear this field to customize exclusions." })
4187
+ ] }),
4188
+ /* @__PURE__ */ jsx(
4189
+ "textarea",
4190
+ {
4191
+ name: "excludePatterns",
4192
+ id: "excludePatterns",
4193
+ rows: "5",
4194
+ class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white font-mono text-xs",
4195
+ children: defaultExcludePatternsText
4196
+ }
4197
+ ),
4198
+ /* @__PURE__ */ jsx("p", { class: "mt-1 text-xs text-gray-500 dark:text-gray-400", children: "Default patterns are pre-filled. Edit to customize or clear to exclude nothing." })
4199
+ ] }),
4200
+ /* @__PURE__ */ jsxs("div", { children: [
4201
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3375
4202
  /* @__PURE__ */ jsx(
3376
- "input",
4203
+ "label",
3377
4204
  {
3378
- type: "text",
3379
- class: "w-1/2 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
3380
- placeholder: "Header Value",
3381
- "x-model": "header.value",
3382
- required: true
4205
+ for: "scrapeMode",
4206
+ class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
4207
+ children: "Scrape Mode"
3383
4208
  }
3384
4209
  ),
3385
4210
  /* @__PURE__ */ jsx(
3386
- "button",
4211
+ Tooltip,
3387
4212
  {
3388
- type: "button",
3389
- class: "text-red-500 hover:text-red-700 text-xs",
3390
- "x-on:click": "headers.splice(idx, 1)",
3391
- children: "Remove"
4213
+ text: /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
4214
+ /* @__PURE__ */ jsx("li", { children: "'Auto' automatically selects the best method," }),
4215
+ /* @__PURE__ */ jsx("li", { children: "'Fetch' uses simple HTTP requests (faster but may miss dynamic content)," }),
4216
+ /* @__PURE__ */ jsx("li", { children: "'Playwright' uses a headless browser (slower but better for JS-heavy sites)." })
4217
+ ] }) })
3392
4218
  }
3393
- ),
4219
+ )
4220
+ ] }),
4221
+ /* @__PURE__ */ jsxs(
4222
+ "select",
4223
+ {
4224
+ name: "scrapeMode",
4225
+ id: "scrapeMode",
4226
+ class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
4227
+ children: [
4228
+ /* @__PURE__ */ jsx("option", { value: ScrapeMode.Auto, selected: true, children: "Auto (Default)" }),
4229
+ /* @__PURE__ */ jsx("option", { value: ScrapeMode.Fetch, children: "Fetch" }),
4230
+ /* @__PURE__ */ jsx("option", { value: ScrapeMode.Playwright, children: "Playwright" })
4231
+ ]
4232
+ }
4233
+ )
4234
+ ] }),
4235
+ /* @__PURE__ */ jsxs("div", { children: [
4236
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center mb-1", children: [
4237
+ /* @__PURE__ */ jsx("label", { class: "block text-sm font-medium text-gray-700 dark:text-gray-300", children: "Custom HTTP Headers" }),
4238
+ /* @__PURE__ */ jsx(Tooltip, { text: "Add custom HTTP headers (e.g., for authentication). These will be sent with every HTTP request." })
4239
+ ] }),
4240
+ /* @__PURE__ */ jsxs("div", { children: [
4241
+ /* @__PURE__ */ jsx("template", { "x-for": "(header, idx) in headers", children: /* @__PURE__ */ jsxs("div", { class: "flex space-x-2 mb-1", children: [
4242
+ /* @__PURE__ */ jsx(
4243
+ "input",
4244
+ {
4245
+ type: "text",
4246
+ class: "w-1/3 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
4247
+ placeholder: "Header Name",
4248
+ "x-model": "header.name",
4249
+ required: true
4250
+ }
4251
+ ),
4252
+ /* @__PURE__ */ jsx("span", { class: "text-gray-500", children: ":" }),
4253
+ /* @__PURE__ */ jsx(
4254
+ "input",
4255
+ {
4256
+ type: "text",
4257
+ class: "w-1/2 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
4258
+ placeholder: "Header Value",
4259
+ "x-model": "header.value",
4260
+ required: true
4261
+ }
4262
+ ),
4263
+ /* @__PURE__ */ jsx(
4264
+ "button",
4265
+ {
4266
+ type: "button",
4267
+ class: "text-red-500 hover:text-red-700 text-xs",
4268
+ "x-on:click": "headers.splice(idx, 1)",
4269
+ children: "Remove"
4270
+ }
4271
+ ),
4272
+ /* @__PURE__ */ jsx(
4273
+ "input",
4274
+ {
4275
+ type: "hidden",
4276
+ name: "header[]",
4277
+ "x-bind:value": "header.name && header.value ? header.name + ':' + header.value : ''"
4278
+ }
4279
+ )
4280
+ ] }) }),
3394
4281
  /* @__PURE__ */ jsx(
3395
- "input",
4282
+ "button",
3396
4283
  {
3397
- type: "hidden",
3398
- name: "header[]",
3399
- "x-bind:value": "header.name && header.value ? header.name + ':' + header.value : ''"
4284
+ type: "button",
4285
+ class: "mt-1 px-2 py-0.5 bg-indigo-100 dark:bg-indigo-900 text-indigo-700 dark:text-indigo-200 rounded text-xs",
4286
+ "x-on:click": "headers.push({ name: '', value: '' })",
4287
+ children: "+ Add Header"
3400
4288
  }
3401
4289
  )
3402
- ] }) }),
4290
+ ] })
4291
+ ] }),
4292
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4293
+ /* @__PURE__ */ jsx(
4294
+ "input",
4295
+ {
4296
+ id: "followRedirects",
4297
+ name: "followRedirects",
4298
+ type: "checkbox",
4299
+ checked: true,
4300
+ class: "h-4 w-4 text-indigo-600 focus:ring-indigo-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
4301
+ }
4302
+ ),
4303
+ /* @__PURE__ */ jsx(
4304
+ "label",
4305
+ {
4306
+ for: "followRedirects",
4307
+ class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
4308
+ children: "Follow Redirects"
4309
+ }
4310
+ )
4311
+ ] }),
4312
+ /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
4313
+ /* @__PURE__ */ jsx(
4314
+ "input",
4315
+ {
4316
+ id: "ignoreErrors",
4317
+ name: "ignoreErrors",
4318
+ type: "checkbox",
4319
+ checked: true,
4320
+ class: "h-4 w-4 text-indigo-600 focus:ring-indigo-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
4321
+ }
4322
+ ),
3403
4323
  /* @__PURE__ */ jsx(
3404
- "button",
4324
+ "label",
3405
4325
  {
3406
- type: "button",
3407
- class: "mt-1 px-2 py-0.5 bg-indigo-100 dark:bg-indigo-900 text-indigo-700 dark:text-indigo-200 rounded text-xs",
3408
- "x-on:click": "headers.push({ name: '', value: '' })",
3409
- children: "+ Add Header"
4326
+ for: "ignoreErrors",
4327
+ class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
4328
+ children: "Ignore Errors During Scraping"
3410
4329
  }
3411
4330
  )
3412
4331
  ] })
3413
- ] }),
3414
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3415
- /* @__PURE__ */ jsx(
3416
- "input",
3417
- {
3418
- id: "followRedirects",
3419
- name: "followRedirects",
3420
- type: "checkbox",
3421
- checked: true,
3422
- class: "h-4 w-4 text-indigo-600 focus:ring-indigo-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
3423
- }
3424
- ),
3425
- /* @__PURE__ */ jsx(
3426
- "label",
3427
- {
3428
- for: "followRedirects",
3429
- class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
3430
- children: "Follow Redirects"
3431
- }
3432
- )
3433
- ] }),
3434
- /* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
3435
- /* @__PURE__ */ jsx(
3436
- "input",
3437
- {
3438
- id: "ignoreErrors",
3439
- name: "ignoreErrors",
3440
- type: "checkbox",
3441
- checked: true,
3442
- class: "h-4 w-4 text-indigo-600 focus:ring-indigo-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
3443
- }
3444
- ),
3445
- /* @__PURE__ */ jsx(
3446
- "label",
3447
- {
3448
- for: "ignoreErrors",
3449
- class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
3450
- children: "Ignore Errors During Scraping"
3451
- }
3452
- )
3453
4332
  ] })
3454
- ] })
3455
- ] }),
3456
- /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsx(
3457
- "button",
3458
- {
3459
- type: "submit",
3460
- class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500",
3461
- children: "Queue Job"
3462
- }
3463
- ) })
3464
- ]
3465
- }
3466
- ),
3467
- /* @__PURE__ */ jsx("div", { id: "job-response", class: "mt-2 text-sm" })
3468
- ] });
3469
- const ScrapeForm = () => /* @__PURE__ */ jsx("div", { id: "scrape-form-container", children: /* @__PURE__ */ jsx(ScrapeFormContent, {}) });
4333
+ ] }),
4334
+ /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsx(
4335
+ "button",
4336
+ {
4337
+ type: "submit",
4338
+ class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500",
4339
+ children: "Queue Job"
4340
+ }
4341
+ ) })
4342
+ ]
4343
+ }
4344
+ ),
4345
+ /* @__PURE__ */ jsx("div", { id: "job-response", class: "mt-2 text-sm" })
4346
+ ] });
4347
+ };
4348
+ const ScrapeForm = ({ defaultExcludePatterns }) => /* @__PURE__ */ jsx("div", { id: "scrape-form-container", children: /* @__PURE__ */ jsx(ScrapeFormContent, { defaultExcludePatterns }) });
4349
+ const DEFAULT_FILE_EXCLUSIONS = [
4350
+ // CHANGELOG files (case variations)
4351
+ "**/CHANGELOG.md",
4352
+ "**/changelog.md",
4353
+ "**/CHANGELOG.mdx",
4354
+ "**/changelog.mdx",
4355
+ // LICENSE files (case variations)
4356
+ "**/LICENSE",
4357
+ "**/LICENSE.md",
4358
+ "**/license.md",
4359
+ // CODE_OF_CONDUCT files (case variations)
4360
+ "**/CODE_OF_CONDUCT.md",
4361
+ "**/code_of_conduct.md"
4362
+ ];
4363
+ const DEFAULT_FOLDER_EXCLUSIONS = [
4364
+ // Archive and deprecated content (matches anywhere in path)
4365
+ "**/archive/**",
4366
+ "**/archived/**",
4367
+ "**/deprecated/**",
4368
+ "**/legacy/**",
4369
+ "**/old/**",
4370
+ "**/outdated/**",
4371
+ "**/previous/**",
4372
+ "**/superseded/**",
4373
+ // Specific paths that don't follow the general pattern
4374
+ "docs/old/**",
4375
+ // Internationalization folders - non-English locales
4376
+ "**/i18n/ar*/**",
4377
+ "**/i18n/de*/**",
4378
+ "**/i18n/es*/**",
4379
+ "**/i18n/fr*/**",
4380
+ "**/i18n/hi*/**",
4381
+ "**/i18n/it*/**",
4382
+ "**/i18n/ja*/**",
4383
+ "**/i18n/ko*/**",
4384
+ "**/i18n/nl*/**",
4385
+ "**/i18n/pl*/**",
4386
+ "**/i18n/pt*/**",
4387
+ "**/i18n/ru*/**",
4388
+ "**/i18n/sv*/**",
4389
+ "**/i18n/th*/**",
4390
+ "**/i18n/tr*/**",
4391
+ "**/i18n/vi*/**",
4392
+ "**/i18n/zh*/**",
4393
+ // Common locale folder patterns
4394
+ "**/zh-cn/**",
4395
+ "**/zh-hk/**",
4396
+ "**/zh-mo/**",
4397
+ "**/zh-sg/**",
4398
+ "**/zh-tw/**"
4399
+ ];
4400
+ const DEFAULT_EXCLUSION_PATTERNS = [
4401
+ ...DEFAULT_FILE_EXCLUSIONS,
4402
+ ...DEFAULT_FOLDER_EXCLUSIONS
4403
+ ];
4404
+ function getEffectiveExclusionPatterns(userPatterns) {
4405
+ if (userPatterns !== void 0) {
4406
+ return userPatterns;
4407
+ }
4408
+ return DEFAULT_EXCLUSION_PATTERNS;
4409
+ }
3470
4410
  function registerNewJobRoutes(server, scrapeTool) {
3471
4411
  server.get("/web/jobs/new", async () => {
3472
- return /* @__PURE__ */ jsx(ScrapeForm, {});
4412
+ return /* @__PURE__ */ jsx(ScrapeForm, { defaultExcludePatterns: DEFAULT_EXCLUSION_PATTERNS });
3473
4413
  });
3474
4414
  server.post(
3475
4415
  "/web/jobs/scrape",
@@ -3540,7 +4480,7 @@ function registerNewJobRoutes(server, scrapeTool) {
3540
4480
  ] })
3541
4481
  }
3542
4482
  ),
3543
- /* @__PURE__ */ jsx("div", { id: "scrape-form-container", "hx-swap-oob": "innerHTML", children: /* @__PURE__ */ jsx(ScrapeFormContent, {}) })
4483
+ /* @__PURE__ */ jsx("div", { id: "scrape-form-container", "hx-swap-oob": "innerHTML", children: /* @__PURE__ */ jsx(ScrapeFormContent, { defaultExcludePatterns: DEFAULT_EXCLUSION_PATTERNS }) })
3544
4484
  ] });
3545
4485
  }
3546
4486
  return /* @__PURE__ */ jsx(Alert, { type: "warning", message: "Job finished unexpectedly quickly." });
@@ -3959,16 +4899,59 @@ async function registerWorkerService(pipeline) {
3959
4899
  pipeline.setCallbacks({
3960
4900
  onJobProgress: async (job, progress) => {
3961
4901
  logger.debug(
3962
- `📊 Job ${job.id} progress: ${progress.pagesScraped}/${progress.totalPages} pages`
4902
+ `Job ${job.id} progress: ${progress.pagesScraped}/${progress.totalPages} pages`
3963
4903
  );
4904
+ analytics.track(TelemetryEvent.PIPELINE_JOB_PROGRESS, {
4905
+ jobId: job.id,
4906
+ // Job IDs are already anonymous
4907
+ library: job.library,
4908
+ pagesScraped: progress.pagesScraped,
4909
+ totalPages: progress.totalPages,
4910
+ totalDiscovered: progress.totalDiscovered,
4911
+ progressPercent: Math.round(progress.pagesScraped / progress.totalPages * 100),
4912
+ currentDepth: progress.depth,
4913
+ maxDepth: progress.maxDepth,
4914
+ discoveryRatio: Math.round(
4915
+ progress.totalDiscovered / progress.totalPages * 100
4916
+ ),
4917
+ // How much we discovered vs limited total
4918
+ queue_efficiency: progress.totalPages > 0 ? Math.round(progress.pagesScraped / progress.totalPages * 100) : 0
4919
+ });
3964
4920
  },
3965
4921
  onJobStatusChange: async (job) => {
3966
- logger.debug(`🔄 Job ${job.id} status changed to: ${job.status}`);
4922
+ logger.debug(`Job ${job.id} status changed to: ${job.status}`);
4923
+ const duration = job.startedAt ? Date.now() - job.startedAt.getTime() : null;
4924
+ const queueWaitTime = job.startedAt && job.createdAt ? job.startedAt.getTime() - job.createdAt.getTime() : null;
4925
+ analytics.track(TelemetryEvent.PIPELINE_JOB_COMPLETED, {
4926
+ jobId: job.id,
4927
+ // Job IDs are already anonymous
4928
+ library: job.library,
4929
+ status: job.status,
4930
+ duration_ms: duration,
4931
+ queue_wait_time_ms: queueWaitTime,
4932
+ pages_processed: job.progressPages || 0,
4933
+ max_pages_configured: job.progressMaxPages || 0,
4934
+ has_version: !!job.version,
4935
+ has_error: !!job.error,
4936
+ throughput_pages_per_second: duration && job.progressPages ? Math.round(job.progressPages / duration * 1e3) : 0
4937
+ });
3967
4938
  },
3968
4939
  onJobError: async (job, error, document) => {
3969
4940
  logger.warn(
3970
4941
  `⚠️ Job ${job.id} error ${document ? `on document ${document.metadata.url}` : ""}: ${error.message}`
3971
4942
  );
4943
+ const errorInfo = sanitizeError(error);
4944
+ analytics.track(TelemetryEvent.ERROR_OCCURRED, {
4945
+ jobId: job.id,
4946
+ // Job IDs are already anonymous
4947
+ library: job.library,
4948
+ errorType: errorInfo.type,
4949
+ errorMessage: errorInfo.message,
4950
+ hasDocument: !!document,
4951
+ stage: document ? "document_processing" : "job_setup",
4952
+ hasStack: errorInfo.hasStack,
4953
+ pages_processed_before_error: job.progressPages || 0
4954
+ });
3972
4955
  }
3973
4956
  });
3974
4957
  await pipeline.start();
@@ -3987,7 +4970,7 @@ function getProjectRoot() {
3987
4970
  let currentDir = path.dirname(currentFilePath);
3988
4971
  while (true) {
3989
4972
  const packageJsonPath = path.join(currentDir, "package.json");
3990
- if (fs$1.existsSync(packageJsonPath)) {
4973
+ if (fs.existsSync(packageJsonPath)) {
3991
4974
  projectRoot = currentDir;
3992
4975
  return projectRoot;
3993
4976
  }
@@ -4010,6 +4993,7 @@ class AppServer {
4010
4993
  }
4011
4994
  server;
4012
4995
  mcpServer = null;
4996
+ authManager = null;
4013
4997
  config;
4014
4998
  /**
4015
4999
  * Validate the server configuration for invalid service combinations.
@@ -4040,15 +5024,48 @@ class AppServer {
4040
5024
  */
4041
5025
  async start() {
4042
5026
  this.validateConfig();
5027
+ if (this.config.telemetry !== false && shouldEnableTelemetry()) {
5028
+ try {
5029
+ telemetryService.startSession({
5030
+ sessionId: crypto.randomUUID(),
5031
+ interface: "web",
5032
+ startTime: /* @__PURE__ */ new Date(),
5033
+ version: process.env.npm_package_version || "unknown",
5034
+ platform: process.platform,
5035
+ servicesEnabled: this.getActiveServicesList(),
5036
+ authEnabled: Boolean(this.config.auth),
5037
+ readOnly: Boolean(this.config.readOnly)
5038
+ });
5039
+ } catch (error) {
5040
+ logger.debug(`Failed to initialize telemetry: ${error}`);
5041
+ }
5042
+ }
4043
5043
  await this.setupServer();
4044
5044
  try {
5045
+ const startupStartTime = performance.now();
4045
5046
  const address = await this.server.listen({
4046
5047
  port: this.config.port,
4047
5048
  host: "0.0.0.0"
4048
5049
  });
5050
+ const startupDuration = performance.now() - startupStartTime;
5051
+ if (analytics.isEnabled()) {
5052
+ analytics.track(TelemetryEvent.APP_STARTED, {
5053
+ startup_success: true,
5054
+ startup_duration_ms: Math.round(startupDuration),
5055
+ listen_address: address,
5056
+ active_services: this.getActiveServicesList()
5057
+ });
5058
+ }
4049
5059
  this.logStartupInfo(address);
4050
5060
  return this.server;
4051
5061
  } catch (error) {
5062
+ if (analytics.isEnabled()) {
5063
+ analytics.track(TelemetryEvent.APP_STARTED, {
5064
+ startup_success: false,
5065
+ error_type: error instanceof Error ? error.constructor.name : "UnknownError",
5066
+ error_message: error instanceof Error ? error.message : String(error)
5067
+ });
5068
+ }
4052
5069
  logger.error(`❌ Failed to start AppServer: ${error}`);
4053
5070
  await this.server.close();
4054
5071
  throw error;
@@ -4059,24 +5076,121 @@ class AppServer {
4059
5076
  */
4060
5077
  async stop() {
4061
5078
  try {
5079
+ if (analytics.isEnabled()) {
5080
+ analytics.track(TelemetryEvent.APP_SHUTDOWN, {
5081
+ graceful: true
5082
+ });
5083
+ }
4062
5084
  if (this.config.enableWorker) {
4063
5085
  await stopWorkerService(this.pipeline);
4064
5086
  }
4065
5087
  if (this.mcpServer) {
4066
5088
  await cleanupMcpService(this.mcpServer);
4067
5089
  }
5090
+ telemetryService.endSession();
5091
+ await telemetryService.shutdown();
4068
5092
  await this.server.close();
4069
5093
  logger.info("🛑 AppServer stopped");
4070
5094
  } catch (error) {
4071
5095
  logger.error(`❌ Failed to stop AppServer gracefully: ${error}`);
5096
+ if (analytics.isEnabled()) {
5097
+ analytics.track(TelemetryEvent.APP_SHUTDOWN, {
5098
+ graceful: false,
5099
+ error: error instanceof Error ? error.constructor.name : "UnknownError"
5100
+ });
5101
+ await telemetryService.shutdown();
5102
+ }
4072
5103
  throw error;
4073
5104
  }
4074
5105
  }
5106
+ /**
5107
+ * Setup global error handling for telemetry
5108
+ */
5109
+ setupErrorHandling() {
5110
+ if (!process.listenerCount("unhandledRejection")) {
5111
+ process.on("unhandledRejection", (reason) => {
5112
+ logger.error(`Unhandled Promise Rejection: ${reason}`);
5113
+ if (analytics.isEnabled()) {
5114
+ analytics.track(TelemetryEvent.ERROR_OCCURRED, {
5115
+ error_type: "UnhandledPromiseRejection",
5116
+ error_category: "system",
5117
+ component: "AppServer",
5118
+ severity: "critical",
5119
+ context: "process_unhandled_rejection"
5120
+ });
5121
+ }
5122
+ });
5123
+ }
5124
+ if (!process.listenerCount("uncaughtException")) {
5125
+ process.on("uncaughtException", (error) => {
5126
+ logger.error(`Uncaught Exception: ${error.message}`);
5127
+ if (analytics.isEnabled()) {
5128
+ analytics.track(TelemetryEvent.ERROR_OCCURRED, {
5129
+ error_type: error.constructor.name,
5130
+ error_category: "system",
5131
+ component: "AppServer",
5132
+ severity: "critical",
5133
+ context: "process_uncaught_exception"
5134
+ });
5135
+ }
5136
+ });
5137
+ }
5138
+ if (typeof this.server.setErrorHandler === "function") {
5139
+ this.server.setErrorHandler(async (error, request, reply) => {
5140
+ if (analytics.isEnabled()) {
5141
+ analytics.track(TelemetryEvent.ERROR_OCCURRED, {
5142
+ error_type: error.constructor.name,
5143
+ error_category: "http",
5144
+ component: "FastifyServer",
5145
+ severity: "high",
5146
+ status_code: error.statusCode || 500,
5147
+ method: request.method,
5148
+ route: request.routeOptions?.url || request.url,
5149
+ context: "http_request_error"
5150
+ });
5151
+ }
5152
+ logger.error(`HTTP Error on ${request.method} ${request.url}: ${error.message}`);
5153
+ const statusCode = error.statusCode || 500;
5154
+ reply.status(statusCode).send({
5155
+ error: "Internal Server Error",
5156
+ statusCode,
5157
+ message: statusCode < 500 ? error.message : "An unexpected error occurred"
5158
+ });
5159
+ });
5160
+ }
5161
+ }
5162
+ /**
5163
+ * Get list of currently active services for telemetry
5164
+ */
5165
+ getActiveServicesList() {
5166
+ const services = [];
5167
+ if (this.config.enableMcpServer) services.push("mcp");
5168
+ if (this.config.enableWebInterface) services.push("web");
5169
+ if (this.config.enableApiServer) services.push("api");
5170
+ if (this.config.enableWorker) services.push("worker");
5171
+ return services;
5172
+ }
4075
5173
  /**
4076
5174
  * Setup the server with plugins and conditionally enabled services.
4077
5175
  */
4078
5176
  async setupServer() {
5177
+ this.setupErrorHandling();
5178
+ if (this.config.auth?.enabled) {
5179
+ await this.initializeAuth();
5180
+ }
4079
5181
  await this.server.register(formBody);
5182
+ if (this.config.auth?.enabled) {
5183
+ this.server.addHook("onRequest", async (request) => {
5184
+ if (request.url.includes("/oauth") || request.url.includes("/auth") || request.url.includes("/register")) {
5185
+ logger.debug(
5186
+ `${request.method} ${request.url} - Headers: ${JSON.stringify(request.headers)}`
5187
+ );
5188
+ }
5189
+ });
5190
+ }
5191
+ if (this.config.auth?.enabled && this.authManager) {
5192
+ await this.setupAuthMetadataEndpoint();
5193
+ }
4080
5194
  if (this.config.enableWebInterface) {
4081
5195
  await this.enableWebInterface();
4082
5196
  }
@@ -4107,7 +5221,9 @@ class AppServer {
4107
5221
  this.mcpServer = await registerMcpService(
4108
5222
  this.server,
4109
5223
  this.docService,
4110
- this.pipeline
5224
+ this.pipeline,
5225
+ this.config.readOnly,
5226
+ this.authManager || void 0
4111
5227
  );
4112
5228
  logger.debug("MCP server service enabled");
4113
5229
  }
@@ -4135,6 +5251,28 @@ class AppServer {
4135
5251
  index: false
4136
5252
  });
4137
5253
  }
5254
+ /**
5255
+ * Initialize OAuth2/OIDC authentication manager.
5256
+ */
5257
+ async initializeAuth() {
5258
+ if (!this.config.auth) {
5259
+ return;
5260
+ }
5261
+ this.authManager = new ProxyAuthManager(this.config.auth);
5262
+ await this.authManager.initialize();
5263
+ logger.debug("Proxy auth manager initialized");
5264
+ }
5265
+ /**
5266
+ * Setup OAuth2 endpoints using ProxyAuthManager.
5267
+ */
5268
+ async setupAuthMetadataEndpoint() {
5269
+ if (!this.authManager) {
5270
+ return;
5271
+ }
5272
+ const baseUrl = new URL(`http://localhost:${this.config.port}`);
5273
+ this.authManager.registerRoutes(this.server, baseUrl);
5274
+ logger.debug("OAuth2 proxy endpoints registered");
5275
+ }
4138
5276
  /**
4139
5277
  * Log startup information showing which services are enabled.
4140
5278
  */
@@ -4165,9 +5303,9 @@ async function startAppServer(docService, pipeline, config) {
4165
5303
  await appServer.start();
4166
5304
  return appServer;
4167
5305
  }
4168
- async function startStdioServer(tools) {
5306
+ async function startStdioServer(tools, readOnly = false) {
4169
5307
  setLogLevel(LogLevel.ERROR);
4170
- const server = createMcpServerInstance(tools);
5308
+ const server = createMcpServerInstance(tools, readOnly);
4171
5309
  const transport = new StdioServerTransport();
4172
5310
  await server.connect(transport);
4173
5311
  logger.info("🤖 MCP server listening on stdio");
@@ -4226,10 +5364,10 @@ async function applyMigrations(db) {
4226
5364
  logger.debug("Checking database migrations...");
4227
5365
  ensureMigrationsTable(db);
4228
5366
  const appliedMigrations = getAppliedMigrations(db);
4229
- if (!fs$1.existsSync(MIGRATIONS_DIR)) {
5367
+ if (!fs.existsSync(MIGRATIONS_DIR)) {
4230
5368
  throw new StoreError("Migrations directory not found");
4231
5369
  }
4232
- const migrationFiles = fs$1.readdirSync(MIGRATIONS_DIR).filter((file) => file.endsWith(".sql")).sort();
5370
+ const migrationFiles = fs.readdirSync(MIGRATIONS_DIR).filter((file) => file.endsWith(".sql")).sort();
4233
5371
  const pendingMigrations = migrationFiles.filter(
4234
5372
  (filename) => !appliedMigrations.has(filename)
4235
5373
  );
@@ -4240,12 +5378,12 @@ async function applyMigrations(db) {
4240
5378
  for (const filename of pendingMigrations) {
4241
5379
  logger.debug(`Applying migration: ${filename}`);
4242
5380
  const filePath = path.join(MIGRATIONS_DIR, filename);
4243
- const sql = fs$1.readFileSync(filePath, "utf8");
5381
+ const sql = fs.readFileSync(filePath, "utf8");
4244
5382
  try {
4245
5383
  db.exec(sql);
4246
5384
  const insertStmt = db.prepare(`INSERT INTO ${MIGRATIONS_TABLE} (id) VALUES (?)`);
4247
5385
  insertStmt.run(filename);
4248
- logger.debug(`✅ Applied migration: ${filename}`);
5386
+ logger.debug(`Applied migration: ${filename}`);
4249
5387
  appliedCount++;
4250
5388
  } catch (error) {
4251
5389
  logger.error(`❌ Failed to apply migration: ${filename} - ${error}`);
@@ -4319,12 +5457,12 @@ async function createDocumentManagement(options = {}) {
4319
5457
  await client.initialize();
4320
5458
  return client;
4321
5459
  }
4322
- const service = new (await import("./DocumentManagementService-BH02TJEe.js")).DocumentManagementService();
5460
+ const service = new (await import("./DocumentManagementService-C1xAzouZ.js")).DocumentManagementService();
4323
5461
  await service.initialize();
4324
5462
  return service;
4325
5463
  }
4326
5464
  async function createLocalDocumentManagement() {
4327
- const service = new (await import("./DocumentManagementService-BH02TJEe.js")).DocumentManagementService();
5465
+ const service = new (await import("./DocumentManagementService-C1xAzouZ.js")).DocumentManagementService();
4328
5466
  await service.initialize();
4329
5467
  return service;
4330
5468
  }
@@ -4494,6 +5632,16 @@ function validateUrl(url) {
4494
5632
  throw new InvalidUrlError(url, error instanceof Error ? error : void 0);
4495
5633
  }
4496
5634
  }
5635
+ function extractPrimaryDomain(hostname) {
5636
+ if (/^\d+\.\d+\.\d+\.\d+$/.test(hostname) || /^[0-9a-fA-F:]+$/.test(hostname)) {
5637
+ return hostname;
5638
+ }
5639
+ if (!hostname.includes(".")) {
5640
+ return hostname;
5641
+ }
5642
+ const domain = psl.get(hostname.toLowerCase());
5643
+ return domain || hostname;
5644
+ }
4497
5645
  function computeBaseDirectory(pathname) {
4498
5646
  if (pathname === "") return "/";
4499
5647
  if (pathname.endsWith("/")) return pathname;
@@ -4515,8 +5663,7 @@ function isInScope(baseUrl, targetUrl, scope) {
4515
5663
  case "hostname":
4516
5664
  return baseUrl.hostname === targetUrl.hostname;
4517
5665
  case "domain": {
4518
- const getDomain = (host) => host.split(".").slice(-2).join(".");
4519
- return getDomain(baseUrl.hostname) === getDomain(targetUrl.hostname);
5666
+ return extractPrimaryDomain(baseUrl.hostname) === extractPrimaryDomain(targetUrl.hostname);
4520
5667
  }
4521
5668
  default:
4522
5669
  return false;
@@ -4563,7 +5710,8 @@ function shouldIncludeUrl(url, includePatterns, excludePatterns) {
4563
5710
  }
4564
5711
  }
4565
5712
  const stripSlash = (patterns) => patterns?.map((p) => p.startsWith("/") ? p.slice(1) : p);
4566
- if (matchesAnyPattern(normalizedPath, excludePatterns) || basename && matchesAnyPattern(basename, stripSlash(excludePatterns)))
5713
+ const effectiveExcludePatterns = getEffectiveExclusionPatterns(excludePatterns);
5714
+ if (matchesAnyPattern(normalizedPath, effectiveExcludePatterns) || basename && matchesAnyPattern(basename, stripSlash(effectiveExcludePatterns)))
4567
5715
  return false;
4568
5716
  if (!includePatterns || includePatterns.length === 0) return true;
4569
5717
  return matchesAnyPattern(normalizedPath, includePatterns) || (basename ? matchesAnyPattern(basename, stripSlash(includePatterns)) : false);
@@ -4889,9 +6037,9 @@ class LocalFileStrategy extends BaseScraperStrategy {
4889
6037
  }
4890
6038
  async processItem(item, options, _progressCallback, _signal) {
4891
6039
  const filePath = decodeURIComponent(item.url.replace(/^file:\/\//, ""));
4892
- const stats = await fs.stat(filePath);
6040
+ const stats = await fs$1.stat(filePath);
4893
6041
  if (stats.isDirectory()) {
4894
- const contents = await fs.readdir(filePath);
6042
+ const contents = await fs$1.readdir(filePath);
4895
6043
  const links = contents.map((name) => `file://${path.join(filePath, name)}`).filter((url) => this.shouldProcessUrl(url, options));
4896
6044
  return { links };
4897
6045
  }
@@ -5722,11 +6870,11 @@ async function createPipelineWithCallbacks(docService, options = {}) {
5722
6870
  pipeline.setCallbacks({
5723
6871
  onJobProgress: async (job, progress) => {
5724
6872
  logger.debug(
5725
- `📊 Job ${job.id} progress: ${progress.pagesScraped}/${progress.totalPages} pages`
6873
+ `Job ${job.id} progress: ${progress.pagesScraped}/${progress.totalPages} pages`
5726
6874
  );
5727
6875
  },
5728
6876
  onJobStatusChange: async (job) => {
5729
- logger.debug(`🔄 Job ${job.id} status changed to: ${job.status}`);
6877
+ logger.debug(`Job ${job.id} status changed to: ${job.status}`);
5730
6878
  },
5731
6879
  onJobError: async (job, error, document) => {
5732
6880
  logger.warn(
@@ -5743,7 +6891,9 @@ function createAppServerConfig(options) {
5743
6891
  enableApiServer: options.enableApiServer ?? false,
5744
6892
  enableWorker: options.enableWorker ?? true,
5745
6893
  port: options.port,
5746
- externalWorkerUrl: options.externalWorkerUrl
6894
+ externalWorkerUrl: options.externalWorkerUrl,
6895
+ readOnly: options.readOnly ?? false,
6896
+ auth: options.auth
5747
6897
  };
5748
6898
  }
5749
6899
  function parseHeaders(headerOptions) {
@@ -5764,8 +6914,84 @@ const CLI_DEFAULTS = {
5764
6914
  PROTOCOL: DEFAULT_PROTOCOL,
5765
6915
  HTTP_PORT: DEFAULT_HTTP_PORT,
5766
6916
  WEB_PORT: DEFAULT_WEB_PORT,
5767
- MAX_CONCURRENCY: DEFAULT_MAX_CONCURRENCY
6917
+ MAX_CONCURRENCY: DEFAULT_MAX_CONCURRENCY,
6918
+ TELEMETRY: true
5768
6919
  };
6920
+ function parseAuthConfig(options) {
6921
+ const enabled = options.authEnabled ?? (process.env.DOCS_MCP_AUTH_ENABLED?.toLowerCase() === "true" || false);
6922
+ if (!enabled) {
6923
+ return void 0;
6924
+ }
6925
+ const issuerUrl = options.authIssuerUrl ?? process.env.DOCS_MCP_AUTH_ISSUER_URL;
6926
+ const audience = options.authAudience ?? process.env.DOCS_MCP_AUTH_AUDIENCE;
6927
+ return {
6928
+ enabled,
6929
+ issuerUrl,
6930
+ audience,
6931
+ scopes: ["openid", "profile"]
6932
+ // Default scopes for OAuth2/OIDC
6933
+ };
6934
+ }
6935
+ function validateAuthConfig(authConfig) {
6936
+ if (!authConfig.enabled) {
6937
+ return;
6938
+ }
6939
+ const errors = [];
6940
+ if (!authConfig.issuerUrl) {
6941
+ errors.push("--auth-issuer-url is required when auth is enabled");
6942
+ } else {
6943
+ try {
6944
+ const url = new URL(authConfig.issuerUrl);
6945
+ if (url.protocol !== "https:") {
6946
+ errors.push("Issuer URL must use HTTPS protocol");
6947
+ }
6948
+ } catch {
6949
+ errors.push("Issuer URL must be a valid URL");
6950
+ }
6951
+ }
6952
+ if (!authConfig.audience) {
6953
+ errors.push("--auth-audience is required when auth is enabled");
6954
+ } else {
6955
+ try {
6956
+ const url = new URL(authConfig.audience);
6957
+ if (url.protocol === "http:" && url.hostname !== "localhost") {
6958
+ logger.warn(
6959
+ "⚠️ Audience uses HTTP protocol - consider using HTTPS for production"
6960
+ );
6961
+ }
6962
+ if (url.hash) {
6963
+ errors.push("Audience must not contain URL fragments");
6964
+ }
6965
+ } catch {
6966
+ if (authConfig.audience.startsWith("urn:")) {
6967
+ const urnParts = authConfig.audience.split(":");
6968
+ if (urnParts.length < 3 || !urnParts[1] || !urnParts[2]) {
6969
+ errors.push("URN audience must follow format: urn:namespace:specific-string");
6970
+ }
6971
+ } else {
6972
+ errors.push(
6973
+ "Audience must be a valid absolute URL or URN (e.g., https://api.example.com or urn:company:service)"
6974
+ );
6975
+ }
6976
+ }
6977
+ }
6978
+ if (errors.length > 0) {
6979
+ throw new Error(`Auth configuration validation failed:
6980
+ ${errors.join("\n")}`);
6981
+ }
6982
+ }
6983
+ function warnHttpUsage(authConfig, port) {
6984
+ if (!authConfig?.enabled) {
6985
+ return;
6986
+ }
6987
+ const isLocalhost = process.env.NODE_ENV !== "production" || port === 6280 || // default dev port
6988
+ process.env.HOSTNAME?.includes("localhost");
6989
+ if (!isLocalhost) {
6990
+ logger.warn(
6991
+ "⚠️ Authentication is enabled but running over HTTP in production. Consider using HTTPS for security."
6992
+ );
6993
+ }
6994
+ }
5769
6995
  function createDefaultAction(program) {
5770
6996
  return program.addOption(
5771
6997
  new Option("--protocol <protocol>", "Protocol for MCP server").choices(["auto", "stdio", "http"]).default("auto")
@@ -5777,13 +7003,33 @@ function createDefaultAction(program) {
5777
7003
  }
5778
7004
  return String(n);
5779
7005
  }).default(CLI_DEFAULTS.HTTP_PORT.toString())
5780
- ).option("--resume", "Resume interrupted jobs on startup", false).option("--no-resume", "Do not resume jobs on startup").action(
7006
+ ).option("--resume", "Resume interrupted jobs on startup", false).option("--no-resume", "Do not resume jobs on startup").option(
7007
+ "--read-only",
7008
+ "Run in read-only mode (only expose read tools, disable write/job tools)",
7009
+ false
7010
+ ).option(
7011
+ "--auth-enabled",
7012
+ "Enable OAuth2/OIDC authentication for MCP endpoints",
7013
+ false
7014
+ ).option("--auth-issuer-url <url>", "Issuer/discovery URL for OAuth2/OIDC provider").option(
7015
+ "--auth-audience <id>",
7016
+ "JWT audience claim (identifies this protected resource)"
7017
+ ).action(
5781
7018
  async (options, command) => {
5782
7019
  const globalOptions = command.opts();
5783
7020
  const resolvedProtocol = resolveProtocol(options.protocol);
5784
7021
  setupLogging(globalOptions, resolvedProtocol);
5785
7022
  logger.debug("No subcommand specified, starting unified server by default...");
5786
7023
  const port = validatePort(options.port);
7024
+ const authConfig = parseAuthConfig({
7025
+ authEnabled: options.authEnabled,
7026
+ authIssuerUrl: options.authIssuerUrl,
7027
+ authAudience: options.authAudience
7028
+ });
7029
+ if (authConfig) {
7030
+ validateAuthConfig(authConfig);
7031
+ warnHttpUsage(authConfig, port);
7032
+ }
5787
7033
  ensurePlaywrightBrowsersInstalled();
5788
7034
  const docService = await createLocalDocumentManagement();
5789
7035
  const pipelineOptions = {
@@ -5793,14 +7039,14 @@ function createDefaultAction(program) {
5793
7039
  };
5794
7040
  const pipeline = await createPipelineWithCallbacks(docService, pipelineOptions);
5795
7041
  if (resolvedProtocol === "stdio") {
5796
- logger.debug(`🔍 Auto-detected stdio protocol (no TTY)`);
7042
+ logger.debug(`Auto-detected stdio protocol (no TTY)`);
5797
7043
  await pipeline.start();
5798
7044
  const mcpTools = await initializeTools(docService, pipeline);
5799
- await startStdioServer(mcpTools);
7045
+ await startStdioServer(mcpTools, options.readOnly);
5800
7046
  await new Promise(() => {
5801
7047
  });
5802
7048
  } else {
5803
- logger.debug(`🔍 Auto-detected http protocol (TTY available)`);
7049
+ logger.debug(`Auto-detected http protocol (TTY available)`);
5804
7050
  const config = createAppServerConfig({
5805
7051
  enableWebInterface: true,
5806
7052
  // Enable web interface in http mode
@@ -5810,7 +7056,9 @@ function createDefaultAction(program) {
5810
7056
  // Enable API (tRPC) in http mode
5811
7057
  enableWorker: true,
5812
7058
  // Always enable in-process worker for unified server
5813
- port
7059
+ port,
7060
+ readOnly: options.readOnly,
7061
+ auth: authConfig
5814
7062
  });
5815
7063
  await startAppServer(docService, pipeline, config);
5816
7064
  await new Promise(() => {
@@ -5824,12 +7072,24 @@ async function fetchUrlAction(url, options, command) {
5824
7072
  setupLogging(globalOptions);
5825
7073
  const headers = parseHeaders(options.header);
5826
7074
  const fetchUrlTool = new FetchUrlTool(new HttpFetcher(), new FileFetcher());
5827
- const content = await fetchUrlTool.execute({
5828
- url,
5829
- followRedirects: options.followRedirects,
5830
- scrapeMode: options.scrapeMode,
5831
- headers: Object.keys(headers).length > 0 ? headers : void 0
5832
- });
7075
+ const content = await trackTool(
7076
+ "fetch_url",
7077
+ () => fetchUrlTool.execute({
7078
+ url,
7079
+ followRedirects: options.followRedirects,
7080
+ scrapeMode: options.scrapeMode,
7081
+ headers: Object.keys(headers).length > 0 ? headers : void 0
7082
+ }),
7083
+ (content2) => ({
7084
+ url_protocol: extractProtocol(url),
7085
+ // Safe: only protocol, not full URL
7086
+ follow_redirects: options.followRedirects,
7087
+ scrape_mode: options.scrapeMode,
7088
+ has_custom_headers: Object.keys(headers).length > 0,
7089
+ content_length: content2.length,
7090
+ cli_flags: extractCliFlags(process.argv)
7091
+ })
7092
+ );
5833
7093
  console.log(content);
5834
7094
  }
5835
7095
  function createFetchUrlCommand(program) {
@@ -5864,10 +7124,22 @@ async function findVersionAction(library, options, command) {
5864
7124
  const docService = await createDocumentManagement({ serverUrl });
5865
7125
  try {
5866
7126
  const findVersionTool = new FindVersionTool(docService);
5867
- const versionInfo = await findVersionTool.execute({
5868
- library,
5869
- targetVersion: options.version
5870
- });
7127
+ const versionInfo = await trackTool(
7128
+ "find_version",
7129
+ () => findVersionTool.execute({
7130
+ library,
7131
+ targetVersion: options.version
7132
+ }),
7133
+ (versionInfo2) => ({
7134
+ library,
7135
+ // Safe: library names are public
7136
+ has_target_version: !!options.version,
7137
+ result_type: typeof versionInfo2,
7138
+ // 'string'
7139
+ using_remote_server: !!serverUrl,
7140
+ cli_flags: extractCliFlags(process.argv)
7141
+ })
7142
+ );
5871
7143
  if (!versionInfo) throw new Error("Failed to get version information");
5872
7144
  console.log(versionInfo);
5873
7145
  } finally {
@@ -5887,7 +7159,15 @@ async function listAction(options, command) {
5887
7159
  const docService = await createDocumentManagement({ serverUrl });
5888
7160
  try {
5889
7161
  const listLibrariesTool = new ListLibrariesTool(docService);
5890
- const result = await listLibrariesTool.execute();
7162
+ const result = await trackTool(
7163
+ "list_libraries",
7164
+ () => listLibrariesTool.execute(),
7165
+ (result2) => ({
7166
+ library_count: result2.libraries.length,
7167
+ using_remote_server: !!serverUrl,
7168
+ cli_flags: extractCliFlags(process.argv)
7169
+ })
7170
+ );
5891
7171
  console.log(formatOutput(result.libraries));
5892
7172
  } finally {
5893
7173
  await docService.shutdown();
@@ -5913,6 +7193,17 @@ function createMcpCommand(program) {
5913
7193
  ).option(
5914
7194
  "--server-url <url>",
5915
7195
  "URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
7196
+ ).option(
7197
+ "--read-only",
7198
+ "Run in read-only mode (only expose read tools, disable write/job tools)",
7199
+ false
7200
+ ).option(
7201
+ "--auth-enabled",
7202
+ "Enable OAuth2/OIDC authentication for MCP endpoints",
7203
+ false
7204
+ ).option("--auth-issuer-url <url>", "Issuer/discovery URL for OAuth2/OIDC provider").option(
7205
+ "--auth-audience <id>",
7206
+ "JWT audience claim (identifies this protected resource)"
5916
7207
  ).action(
5917
7208
  async (cmdOptions, command) => {
5918
7209
  const globalOptions = command.parent?.opts() || {};
@@ -5920,6 +7211,14 @@ function createMcpCommand(program) {
5920
7211
  const serverUrl = cmdOptions.serverUrl;
5921
7212
  const resolvedProtocol = resolveProtocol(cmdOptions.protocol);
5922
7213
  setupLogging(globalOptions, resolvedProtocol);
7214
+ const authConfig = parseAuthConfig({
7215
+ authEnabled: cmdOptions.authEnabled,
7216
+ authIssuerUrl: cmdOptions.authIssuerUrl,
7217
+ authAudience: cmdOptions.authAudience
7218
+ });
7219
+ if (authConfig) {
7220
+ validateAuthConfig(authConfig);
7221
+ }
5923
7222
  try {
5924
7223
  const docService = await createDocumentManagement({
5925
7224
  serverUrl
@@ -5935,15 +7234,15 @@ function createMcpCommand(program) {
5935
7234
  pipelineOptions
5936
7235
  );
5937
7236
  if (resolvedProtocol === "stdio") {
5938
- logger.debug(`🔍 Auto-detected stdio protocol (no TTY)`);
7237
+ logger.debug(`Auto-detected stdio protocol (no TTY)`);
5939
7238
  logger.info("🚀 Starting MCP server (stdio mode)");
5940
7239
  await pipeline.start();
5941
7240
  const mcpTools = await initializeTools(docService, pipeline);
5942
- await startStdioServer(mcpTools);
7241
+ await startStdioServer(mcpTools, cmdOptions.readOnly);
5943
7242
  await new Promise(() => {
5944
7243
  });
5945
7244
  } else {
5946
- logger.debug(`🔍 Auto-detected http protocol (TTY available)`);
7245
+ logger.debug(`Auto-detected http protocol (TTY available)`);
5947
7246
  logger.info("🚀 Starting MCP server (http mode)");
5948
7247
  const config = createAppServerConfig({
5949
7248
  enableWebInterface: false,
@@ -5953,7 +7252,9 @@ function createMcpCommand(program) {
5953
7252
  // Never enable API in mcp command
5954
7253
  enableWorker: !serverUrl,
5955
7254
  port,
5956
- externalWorkerUrl: serverUrl
7255
+ externalWorkerUrl: serverUrl,
7256
+ readOnly: cmdOptions.readOnly,
7257
+ auth: authConfig
5957
7258
  });
5958
7259
  await startAppServer(docService, pipeline, config);
5959
7260
  await new Promise(() => {
@@ -5973,13 +7274,21 @@ async function removeAction(library, options, command) {
5973
7274
  const docService = await createDocumentManagement({ serverUrl });
5974
7275
  const { version: version2 } = options;
5975
7276
  try {
5976
- await docService.removeAllDocuments(library, version2);
5977
- console.log(
5978
- `✅ Successfully removed documents for ${library}${version2 ? `@${version2}` : " (unversioned)"}.`
7277
+ await trackTool(
7278
+ "remove_documents",
7279
+ () => docService.removeAllDocuments(library, version2),
7280
+ () => ({
7281
+ library,
7282
+ // Safe: library names are public
7283
+ has_version: !!version2,
7284
+ using_remote_server: !!serverUrl,
7285
+ cli_flags: extractCliFlags(process.argv)
7286
+ })
5979
7287
  );
7288
+ console.log(`✅ Successfully removed ${library}${version2 ? `@${version2}` : ""}.`);
5980
7289
  } catch (error) {
5981
7290
  console.error(
5982
- `❌ Failed to remove documents for ${library}${version2 ? `@${version2}` : " (unversioned)"}:`,
7291
+ `❌ Failed to remove ${library}${version2 ? `@${version2}` : ""}:`,
5983
7292
  error instanceof Error ? error.message : String(error)
5984
7293
  );
5985
7294
  throw error;
@@ -6015,23 +7324,48 @@ async function scrapeAction(library, url, options, command) {
6015
7324
  await pipeline.start();
6016
7325
  const scrapeTool = new ScrapeTool(pipeline);
6017
7326
  const headers = parseHeaders(options.header);
6018
- const result = await scrapeTool.execute({
6019
- url,
6020
- library,
6021
- version: options.version,
6022
- options: {
6023
- maxPages: Number.parseInt(options.maxPages),
6024
- maxDepth: Number.parseInt(options.maxDepth),
6025
- maxConcurrency: Number.parseInt(options.maxConcurrency),
6026
- ignoreErrors: options.ignoreErrors,
7327
+ const result = await trackTool(
7328
+ "scrape_docs",
7329
+ () => scrapeTool.execute({
7330
+ url,
7331
+ library,
7332
+ version: options.version,
7333
+ options: {
7334
+ maxPages: Number.parseInt(options.maxPages, 10),
7335
+ maxDepth: Number.parseInt(options.maxDepth, 10),
7336
+ maxConcurrency: Number.parseInt(options.maxConcurrency, 10),
7337
+ ignoreErrors: options.ignoreErrors,
7338
+ scope: options.scope,
7339
+ followRedirects: options.followRedirects,
7340
+ scrapeMode: options.scrapeMode,
7341
+ includePatterns: Array.isArray(options.includePattern) && options.includePattern.length > 0 ? options.includePattern : void 0,
7342
+ excludePatterns: Array.isArray(options.excludePattern) && options.excludePattern.length > 0 ? options.excludePattern : void 0,
7343
+ headers: Object.keys(headers).length > 0 ? headers : void 0
7344
+ }
7345
+ }),
7346
+ (result2) => ({
7347
+ library,
7348
+ // Safe: library names are public
7349
+ url_protocol: extractProtocol(url),
7350
+ // Safe: only protocol, not full URL
7351
+ max_pages: Number.parseInt(options.maxPages, 10),
7352
+ max_depth: Number.parseInt(options.maxDepth, 10),
7353
+ max_concurrency: Number.parseInt(options.maxConcurrency, 10),
7354
+ has_version: !!options.version,
6027
7355
  scope: options.scope,
6028
- followRedirects: options.followRedirects,
6029
- scrapeMode: options.scrapeMode,
6030
- includePatterns: Array.isArray(options.includePattern) && options.includePattern.length > 0 ? options.includePattern : void 0,
6031
- excludePatterns: Array.isArray(options.excludePattern) && options.excludePattern.length > 0 ? options.excludePattern : void 0,
6032
- headers: Object.keys(headers).length > 0 ? headers : void 0
6033
- }
6034
- });
7356
+ scrape_mode: options.scrapeMode,
7357
+ ignore_errors: options.ignoreErrors,
7358
+ follow_redirects: options.followRedirects,
7359
+ has_include_patterns: Array.isArray(options.includePattern) && options.includePattern.length > 0,
7360
+ has_exclude_patterns: Array.isArray(options.excludePattern) && options.excludePattern.length > 0,
7361
+ has_custom_headers: Object.keys(headers).length > 0,
7362
+ using_remote_server: !!serverUrl,
7363
+ cli_flags: extractCliFlags(process.argv),
7364
+ is_async_job: !("pagesScraped" in result2),
7365
+ // Pipeline mode vs direct mode
7366
+ pages_scraped: "pagesScraped" in result2 ? result2.pagesScraped : void 0
7367
+ })
7368
+ );
6035
7369
  if ("pagesScraped" in result) {
6036
7370
  console.log(`✅ Successfully scraped ${result.pagesScraped} pages`);
6037
7371
  } else {
@@ -6113,13 +7447,28 @@ async function searchAction(library, query, options, command) {
6113
7447
  const docService = await createDocumentManagement({ serverUrl });
6114
7448
  try {
6115
7449
  const searchTool = new SearchTool(docService);
6116
- const result = await searchTool.execute({
6117
- library,
6118
- version: options.version,
6119
- query,
6120
- limit: Number.parseInt(options.limit),
6121
- exactMatch: options.exactMatch
6122
- });
7450
+ const result = await trackTool(
7451
+ "search_docs",
7452
+ () => searchTool.execute({
7453
+ library,
7454
+ version: options.version,
7455
+ query,
7456
+ limit: Number.parseInt(options.limit, 10),
7457
+ exactMatch: options.exactMatch
7458
+ }),
7459
+ (result2) => ({
7460
+ library,
7461
+ // Safe: library names are public
7462
+ query_analysis: analyzeSearchQuery(query),
7463
+ // Analyzed, not raw query
7464
+ result_count: result2.results.length,
7465
+ limit_used: Number.parseInt(options.limit, 10),
7466
+ has_version_filter: !!options.version,
7467
+ exact_match: options.exactMatch,
7468
+ using_remote_server: !!serverUrl,
7469
+ cli_flags: extractCliFlags(process.argv)
7470
+ })
7471
+ );
6123
7472
  console.log(formatOutput(result.results));
6124
7473
  } finally {
6125
7474
  await docService.shutdown();
@@ -6232,11 +7581,28 @@ function createCliProgram() {
6232
7581
  const program = new Command();
6233
7582
  program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version(packageJson.version).addOption(
6234
7583
  new Option("--verbose", "Enable verbose (debug) logging").conflicts("silent")
6235
- ).addOption(new Option("--silent", "Disable all logging except errors")).enablePositionalOptions().allowExcessArguments(false).showHelpAfterError(true);
6236
- program.hook("preAction", (thisCommand, _actionCommand) => {
7584
+ ).addOption(new Option("--silent", "Disable all logging except errors")).addOption(new Option("--no-telemetry", "Disable telemetry collection")).enablePositionalOptions().allowExcessArguments(false).showHelpAfterError(true);
7585
+ program.hook("preAction", (thisCommand, actionCommand) => {
6237
7586
  const globalOptions = thisCommand.opts();
6238
7587
  if (globalOptions.silent) setLogLevel(LogLevel.ERROR);
6239
7588
  else if (globalOptions.verbose) setLogLevel(LogLevel.DEBUG);
7589
+ if (shouldEnableTelemetry()) {
7590
+ const commandName = actionCommand.name();
7591
+ const session = createCliSession(commandName, {
7592
+ authEnabled: false,
7593
+ // CLI doesn't use auth
7594
+ readOnly: false
7595
+ });
7596
+ analytics.startSession(session);
7597
+ } else {
7598
+ TelemetryConfig.getInstance().disable();
7599
+ }
7600
+ });
7601
+ program.hook("postAction", async () => {
7602
+ if (analytics.isEnabled()) {
7603
+ analytics.endSession();
7604
+ await analytics.shutdown();
7605
+ }
6240
7606
  });
6241
7607
  createMcpCommand(program);
6242
7608
  createWebCommand(program);
@@ -6284,6 +7650,10 @@ const sigintHandler = async () => {
6284
7650
  activeDocService = null;
6285
7651
  logger.debug("SIGINT: DocumentManagementService shut down.");
6286
7652
  }
7653
+ if (analytics.isEnabled()) {
7654
+ await analytics.shutdown();
7655
+ logger.debug("SIGINT: Analytics shut down.");
7656
+ }
6287
7657
  logger.info("✅ Graceful shutdown completed");
6288
7658
  process.exit(0);
6289
7659
  } catch (error) {
@@ -6359,6 +7729,7 @@ export {
6359
7729
  EMBEDDING_BATCH_CHARS as E,
6360
7730
  LibraryNotFoundError as L,
6361
7731
  StoreError as S,
7732
+ TelemetryEvent as T,
6362
7733
  VECTOR_DIMENSION as V,
6363
7734
  applyMigrations as a,
6364
7735
  EMBEDDING_BATCH_SIZE as b,
@@ -6368,7 +7739,9 @@ export {
6368
7739
  SPLITTER_MAX_CHUNK_SIZE as f,
6369
7740
  getProjectRoot as g,
6370
7741
  VersionNotFoundError as h,
6371
- SPLITTER_MIN_CHUNK_SIZE as i,
7742
+ analytics as i,
7743
+ extractHostname as j,
7744
+ SPLITTER_MIN_CHUNK_SIZE as k,
6372
7745
  logger as l,
6373
7746
  mapDbDocumentToDocument as m,
6374
7747
  normalizeVersionName as n