@arabold/docs-mcp-server 1.25.0 → 1.25.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -541,32 +541,15 @@ class PostHogClient {
541
541
  }
542
542
  class TelemetryConfig {
543
543
  static instance;
544
- enabled;
544
+ enabled = true;
545
+ // Default to enabled
545
546
  constructor() {
546
- this.enabled = this.determineEnabledState();
547
- }
548
- /**
549
- * Determines if telemetry should be enabled based on CLI flags and environment variables.
550
- * Priority: CLI flags > environment variables > default (true)
551
- */
552
- determineEnabledState() {
553
- if (process.env.DOCS_MCP_TELEMETRY === "false") {
554
- return false;
555
- }
556
- const args = process.argv;
557
- if (args.includes("--no-telemetry")) {
558
- return false;
559
- }
560
- return true;
561
547
  }
562
548
  isEnabled() {
563
549
  return this.enabled;
564
550
  }
565
- disable() {
566
- this.enabled = false;
567
- }
568
- enable() {
569
- this.enabled = true;
551
+ setEnabled(enabled) {
552
+ this.enabled = enabled;
570
553
  }
571
554
  static getInstance() {
572
555
  if (!TelemetryConfig.instance) {
@@ -575,10 +558,9 @@ class TelemetryConfig {
575
558
  return TelemetryConfig.instance;
576
559
  }
577
560
  }
578
- function generateInstallationId() {
561
+ function generateInstallationId(storePath) {
579
562
  try {
580
- const envStorePath = process.env.DOCS_MCP_STORE_PATH;
581
- const dataDir = envStorePath || envPaths("docs-mcp-server", { suffix: "" }).data;
563
+ const dataDir = storePath || envPaths("docs-mcp-server", { suffix: "" }).data;
582
564
  const installationIdPath = path.join(dataDir, "installation.id");
583
565
  if (fs.existsSync(installationIdPath)) {
584
566
  const existingId = fs.readFileSync(installationIdPath, "utf8").trim();
@@ -602,10 +584,10 @@ var TelemetryEvent = /* @__PURE__ */ ((TelemetryEvent2) => {
602
584
  TelemetryEvent2["APP_SHUTDOWN"] = "app_shutdown";
603
585
  TelemetryEvent2["CLI_COMMAND"] = "cli_command";
604
586
  TelemetryEvent2["TOOL_USED"] = "tool_used";
605
- TelemetryEvent2["HTTP_REQUEST_COMPLETED"] = "http_request_completed";
606
- TelemetryEvent2["PIPELINE_JOB_PROGRESS"] = "pipeline_job_progress";
607
587
  TelemetryEvent2["PIPELINE_JOB_COMPLETED"] = "pipeline_job_completed";
608
588
  TelemetryEvent2["DOCUMENT_PROCESSED"] = "document_processed";
589
+ TelemetryEvent2["WEB_SEARCH_PERFORMED"] = "web_search_performed";
590
+ TelemetryEvent2["WEB_SCRAPE_STARTED"] = "web_scrape_started";
609
591
  return TelemetryEvent2;
610
592
  })(TelemetryEvent || {});
611
593
  class Analytics {
@@ -623,6 +605,8 @@ class Analytics {
623
605
  const analytics2 = new Analytics(shouldEnable);
624
606
  if (analytics2.isEnabled()) {
625
607
  logger.debug("Analytics enabled");
608
+ } else if (!config.isEnabled()) {
609
+ logger.debug("Analytics disabled (user preference)");
626
610
  } else {
627
611
  logger.debug("Analytics disabled");
628
612
  }
@@ -682,38 +666,29 @@ class Analytics {
682
666
  isEnabled() {
683
667
  return this.enabled;
684
668
  }
685
- /**
686
- * Track tool usage with error handling and automatic timing
687
- */
688
- async trackTool(toolName, operation, getProperties) {
689
- const startTime = Date.now();
690
- try {
691
- const result = await operation();
692
- this.track("tool_used", {
693
- tool: toolName,
694
- success: true,
695
- durationMs: Date.now() - startTime,
696
- ...getProperties ? getProperties(result) : {}
697
- });
698
- return result;
699
- } catch (error) {
700
- this.track("tool_used", {
701
- tool: toolName,
702
- success: false,
703
- durationMs: Date.now() - startTime
704
- });
705
- if (error instanceof Error) {
706
- this.captureException(error, {
707
- tool: toolName,
708
- context: "tool_execution",
709
- durationMs: Date.now() - startTime
710
- });
711
- }
712
- throw error;
713
- }
669
+ }
670
+ let analyticsInstance = null;
671
+ function getAnalytics() {
672
+ if (!analyticsInstance) {
673
+ analyticsInstance = Analytics.create();
714
674
  }
675
+ return analyticsInstance;
715
676
  }
716
- const analytics = Analytics.create();
677
+ function initTelemetry(options) {
678
+ TelemetryConfig.getInstance().setEnabled(options.enabled);
679
+ generateInstallationId(options.storePath);
680
+ analyticsInstance = Analytics.create();
681
+ }
682
+ const analytics = new Proxy({}, {
683
+ get(target, prop) {
684
+ if (!target.isEnabled) {
685
+ const instance = getAnalytics();
686
+ Object.setPrototypeOf(target, Object.getPrototypeOf(instance));
687
+ Object.assign(target, instance);
688
+ }
689
+ return target[prop];
690
+ }
691
+ });
717
692
  function extractHostname(url) {
718
693
  try {
719
694
  const parsed = new URL(url);
@@ -734,7 +709,7 @@ function extractProtocol(urlOrPath) {
734
709
  }
735
710
  }
736
711
  const name = "@arabold/docs-mcp-server";
737
- const version = "1.24.0";
712
+ const version = "1.25.1";
738
713
  const description = "MCP server for fetching and searching documentation";
739
714
  const type = "module";
740
715
  const bin = { "docs-mcp-server": "dist/index.js" };
@@ -4839,6 +4814,98 @@ class MarkdownMetadataExtractorMiddleware {
4839
4814
  await next();
4840
4815
  }
4841
4816
  }
4817
+ class HtmlNormalizationMiddleware {
4818
+ async process(context, next) {
4819
+ if (!context.dom) {
4820
+ logger.debug(
4821
+ `Skipping HTML normalization for ${context.source} - no DOM available`
4822
+ );
4823
+ await next();
4824
+ return;
4825
+ }
4826
+ try {
4827
+ logger.debug(`Normalizing HTML URLs and links for ${context.source}`);
4828
+ const $ = context.dom;
4829
+ const baseUrl = context.source;
4830
+ this.normalizeImageUrls($, baseUrl);
4831
+ this.normalizeLinks($, baseUrl);
4832
+ logger.debug(`Successfully normalized HTML content for ${context.source}`);
4833
+ } catch (error) {
4834
+ logger.error(`❌ Failed to normalize HTML for ${context.source}: ${error}`);
4835
+ context.errors.push(
4836
+ error instanceof Error ? error : new Error(`HTML normalization failed: ${String(error)}`)
4837
+ );
4838
+ }
4839
+ await next();
4840
+ }
4841
+ /**
4842
+ * Normalizes image URLs by converting relative URLs to absolute URLs.
4843
+ */
4844
+ normalizeImageUrls($, baseUrl) {
4845
+ $("img").each((_index, element) => {
4846
+ const $img = $(element);
4847
+ const src = $img.attr("src");
4848
+ if (!src) return;
4849
+ try {
4850
+ new URL(src);
4851
+ } catch {
4852
+ try {
4853
+ const absoluteUrl = new URL(src, baseUrl).href;
4854
+ $img.attr("src", absoluteUrl);
4855
+ logger.debug(`Converted relative image URL: ${src} → ${absoluteUrl}`);
4856
+ } catch (error) {
4857
+ logger.debug(`Failed to resolve relative image URL: ${src} - ${error}`);
4858
+ }
4859
+ }
4860
+ });
4861
+ }
4862
+ /**
4863
+ * Normalizes links by:
4864
+ * - Converting relative URLs to absolute URLs
4865
+ * - Unwrapping anchor links (preserving text content)
4866
+ * - Unwrapping non-HTTP links (preserving text content)
4867
+ */
4868
+ normalizeLinks($, baseUrl) {
4869
+ $("a").each((_index, element) => {
4870
+ const $link = $(element);
4871
+ const href = $link.attr("href");
4872
+ if (!href) {
4873
+ this.unwrapElement($, $link);
4874
+ return;
4875
+ }
4876
+ if (href.startsWith("#")) {
4877
+ logger.debug(`Removing anchor link: ${href}`);
4878
+ this.unwrapElement($, $link);
4879
+ return;
4880
+ }
4881
+ try {
4882
+ const url = new URL(href);
4883
+ if (url.protocol !== "http:" && url.protocol !== "https:") {
4884
+ logger.debug(`Removing non-HTTP link: ${href}`);
4885
+ this.unwrapElement($, $link);
4886
+ return;
4887
+ }
4888
+ } catch {
4889
+ try {
4890
+ const absoluteUrl = new URL(href, baseUrl).href;
4891
+ $link.attr("href", absoluteUrl);
4892
+ logger.debug(`Converted relative link URL: ${href} → ${absoluteUrl}`);
4893
+ } catch (error) {
4894
+ logger.debug(`Failed to resolve relative link URL: ${href} - ${error}`);
4895
+ this.unwrapElement($, $link);
4896
+ }
4897
+ }
4898
+ });
4899
+ }
4900
+ /**
4901
+ * Unwraps an element by replacing it with its HTML content.
4902
+ * This preserves the inner HTML (including nested elements) while removing the wrapping tag.
4903
+ */
4904
+ unwrapElement(_$, $element) {
4905
+ const htmlContent = $element.html() || $element.text();
4906
+ $element.replaceWith(htmlContent);
4907
+ }
4908
+ }
4842
4909
  function detectCharsetFromHtml(htmlContent) {
4843
4910
  const charsetMatch = htmlContent.match(
4844
4911
  /<meta\s+charset\s*=\s*["']?([^"'>\s]+)["']?[^>]*>/i
@@ -4962,6 +5029,7 @@ class HtmlPipeline extends BasePipeline {
4962
5029
  new HtmlMetadataExtractorMiddleware(),
4963
5030
  new HtmlLinkExtractorMiddleware(),
4964
5031
  new HtmlSanitizerMiddleware(),
5032
+ new HtmlNormalizationMiddleware(),
4965
5033
  new HtmlToMarkdownMiddleware()
4966
5034
  ];
4967
5035
  const semanticSplitter = new SemanticMarkdownSplitter(
@@ -7118,7 +7186,7 @@ class EmbeddingConfig {
7118
7186
  }
7119
7187
  }
7120
7188
  /**
7121
- * Parse embedding model configuration from environment variables.
7189
+ * Parse embedding model configuration from a provided model specification.
7122
7190
  * This is a synchronous operation that extracts provider, model, and known dimensions.
7123
7191
  *
7124
7192
  * Supports various providers:
@@ -7129,11 +7197,11 @@ class EmbeddingConfig {
7129
7197
  * - microsoft: Azure OpenAI
7130
7198
  * - sagemaker: AWS SageMaker hosted models
7131
7199
  *
7132
- * @param modelSpec Optional model specification, defaults to DOCS_MCP_EMBEDDING_MODEL env var
7200
+ * @param modelSpec Model specification (e.g., "openai:text-embedding-3-small"), defaults to "text-embedding-3-small"
7133
7201
  * @returns Parsed embedding model configuration
7134
7202
  */
7135
7203
  parse(modelSpec) {
7136
- const spec = modelSpec || process.env.DOCS_MCP_EMBEDDING_MODEL || "text-embedding-3-small";
7204
+ const spec = modelSpec || "text-embedding-3-small";
7137
7205
  const colonIndex = spec.indexOf(":");
7138
7206
  let provider;
7139
7207
  let model;
@@ -7331,16 +7399,13 @@ const CLI_DEFAULTS = {
7331
7399
  TELEMETRY: true
7332
7400
  };
7333
7401
  function parseAuthConfig(options) {
7334
- const enabled = options.authEnabled ?? (process.env.DOCS_MCP_AUTH_ENABLED?.toLowerCase() === "true" || false);
7335
- if (!enabled) {
7402
+ if (!options.authEnabled) {
7336
7403
  return void 0;
7337
7404
  }
7338
- const issuerUrl = options.authIssuerUrl ?? process.env.DOCS_MCP_AUTH_ISSUER_URL;
7339
- const audience = options.authAudience ?? process.env.DOCS_MCP_AUTH_AUDIENCE;
7340
7405
  return {
7341
- enabled,
7342
- issuerUrl,
7343
- audience,
7406
+ enabled: true,
7407
+ issuerUrl: options.authIssuerUrl,
7408
+ audience: options.authAudience,
7344
7409
  scopes: ["openid", "profile"]
7345
7410
  // Default scopes for OAuth2/OIDC
7346
7411
  };
@@ -7405,12 +7470,23 @@ function warnHttpUsage(authConfig, port) {
7405
7470
  );
7406
7471
  }
7407
7472
  }
7408
- function resolveEmbeddingContext(cliArgs) {
7473
+ function resolveEmbeddingContext(embeddingModel) {
7409
7474
  try {
7410
- const modelSpec = cliArgs?.embeddingModel || process.env.DOCS_MCP_EMBEDDING_MODEL;
7411
- logger.debug("Resolving embedding configuration");
7412
- const config = EmbeddingConfig.parseEmbeddingConfig(modelSpec);
7413
- return config;
7475
+ let modelSpec = embeddingModel;
7476
+ if (!modelSpec && process.env.OPENAI_API_KEY) {
7477
+ modelSpec = "text-embedding-3-small";
7478
+ logger.debug(
7479
+ "Using default OpenAI embedding model due to OPENAI_API_KEY presence."
7480
+ );
7481
+ }
7482
+ if (!modelSpec) {
7483
+ logger.debug(
7484
+ "No embedding model specified and OPENAI_API_KEY not found. Embeddings are disabled."
7485
+ );
7486
+ return null;
7487
+ }
7488
+ logger.debug(`Resolving embedding configuration for model: ${modelSpec}`);
7489
+ return EmbeddingConfig.parseEmbeddingConfig(modelSpec);
7414
7490
  } catch (error) {
7415
7491
  logger.debug(`Failed to resolve embedding configuration: ${error}`);
7416
7492
  return null;
@@ -7431,55 +7507,42 @@ class CancelJobTool {
7431
7507
  * @returns A promise that resolves with the outcome message.
7432
7508
  */
7433
7509
  async execute(input) {
7434
- return analytics.trackTool(
7435
- "cancel_job",
7436
- async () => {
7437
- try {
7438
- const job = await this.pipeline.getJob(input.jobId);
7439
- if (!job) {
7440
- logger.warn(`❓ [CancelJobTool] Job not found: ${input.jobId}`);
7441
- return {
7442
- message: `Job with ID ${input.jobId} not found.`,
7443
- success: false
7444
- };
7445
- }
7446
- if (job.status === PipelineJobStatus.COMPLETED || // Use enum member
7447
- job.status === PipelineJobStatus.FAILED || // Use enum member
7448
- job.status === PipelineJobStatus.CANCELLED) {
7449
- logger.debug(
7450
- `Job ${input.jobId} is already in a final state: ${job.status}.`
7451
- );
7452
- return {
7453
- message: `Job ${input.jobId} is already ${job.status}. No action taken.`,
7454
- success: true
7455
- // Considered success as no cancellation needed
7456
- };
7457
- }
7458
- await this.pipeline.cancelJob(input.jobId);
7459
- const updatedJob = await this.pipeline.getJob(input.jobId);
7460
- const finalStatus = updatedJob?.status ?? "UNKNOWN (job disappeared?)";
7461
- logger.debug(
7462
- `Cancellation requested for job ${input.jobId}. Current status: ${finalStatus}`
7463
- );
7464
- return {
7465
- message: `Cancellation requested for job ${input.jobId}. Current status: ${finalStatus}.`,
7466
- success: true
7467
- };
7468
- } catch (error) {
7469
- logger.error(`❌ Error cancelling job ${input.jobId}: ${error}`);
7470
- return {
7471
- message: `Failed to cancel job ${input.jobId}: ${error instanceof Error ? error.message : String(error)}`,
7472
- success: false
7473
- };
7474
- }
7475
- },
7476
- (result) => {
7510
+ try {
7511
+ const job = await this.pipeline.getJob(input.jobId);
7512
+ if (!job) {
7513
+ logger.warn(`❓ [CancelJobTool] Job not found: ${input.jobId}`);
7477
7514
  return {
7478
- success: result.success
7479
- // Note: success flag already indicates if cancellation was successful
7515
+ message: `Job with ID ${input.jobId} not found.`,
7516
+ success: false
7480
7517
  };
7481
7518
  }
7482
- );
7519
+ if (job.status === PipelineJobStatus.COMPLETED || // Use enum member
7520
+ job.status === PipelineJobStatus.FAILED || // Use enum member
7521
+ job.status === PipelineJobStatus.CANCELLED) {
7522
+ logger.debug(`Job ${input.jobId} is already in a final state: ${job.status}.`);
7523
+ return {
7524
+ message: `Job ${input.jobId} is already ${job.status}. No action taken.`,
7525
+ success: true
7526
+ // Considered success as no cancellation needed
7527
+ };
7528
+ }
7529
+ await this.pipeline.cancelJob(input.jobId);
7530
+ const updatedJob = await this.pipeline.getJob(input.jobId);
7531
+ const finalStatus = updatedJob?.status ?? "UNKNOWN (job disappeared?)";
7532
+ logger.debug(
7533
+ `Cancellation requested for job ${input.jobId}. Current status: ${finalStatus}`
7534
+ );
7535
+ return {
7536
+ message: `Cancellation requested for job ${input.jobId}. Current status: ${finalStatus}.`,
7537
+ success: true
7538
+ };
7539
+ } catch (error) {
7540
+ logger.error(`❌ Error cancelling job ${input.jobId}: ${error}`);
7541
+ return {
7542
+ message: `Failed to cancel job ${input.jobId}: ${error instanceof Error ? error.message : String(error)}`,
7543
+ success: false
7544
+ };
7545
+ }
7483
7546
  }
7484
7547
  }
7485
7548
  class ClearCompletedJobsTool {
@@ -7497,33 +7560,24 @@ class ClearCompletedJobsTool {
7497
7560
  * @returns A promise that resolves with the outcome of the clear operation.
7498
7561
  */
7499
7562
  async execute(_input) {
7500
- return analytics.trackTool(
7501
- "clear_completed_jobs",
7502
- async () => {
7503
- try {
7504
- const clearedCount = await this.pipeline.clearCompletedJobs();
7505
- const message = clearedCount > 0 ? `Successfully cleared ${clearedCount} completed job${clearedCount === 1 ? "" : "s"} from the queue.` : "No completed jobs to clear.";
7506
- logger.debug(message);
7507
- return {
7508
- message,
7509
- success: true,
7510
- clearedCount
7511
- };
7512
- } catch (error) {
7513
- const errorMessage = `Failed to clear completed jobs: ${error instanceof Error ? error.message : String(error)}`;
7514
- logger.error(`❌ ${errorMessage}`);
7515
- return {
7516
- message: errorMessage,
7517
- success: false,
7518
- clearedCount: 0
7519
- };
7520
- }
7521
- },
7522
- (result) => ({
7523
- success: result.success,
7524
- clearedCount: result.clearedCount
7525
- })
7526
- );
7563
+ try {
7564
+ const clearedCount = await this.pipeline.clearCompletedJobs();
7565
+ const message = clearedCount > 0 ? `Successfully cleared ${clearedCount} completed job${clearedCount === 1 ? "" : "s"} from the queue.` : "No completed jobs to clear.";
7566
+ logger.debug(message);
7567
+ return {
7568
+ message,
7569
+ success: true,
7570
+ clearedCount
7571
+ };
7572
+ } catch (error) {
7573
+ const errorMessage = `Failed to clear completed jobs: ${error instanceof Error ? error.message : String(error)}`;
7574
+ logger.error(`❌ ${errorMessage}`);
7575
+ return {
7576
+ message: errorMessage,
7577
+ success: false,
7578
+ clearedCount: 0
7579
+ };
7580
+ }
7527
7581
  }
7528
7582
  }
7529
7583
  class ToolError extends Error {
@@ -7583,103 +7637,88 @@ class FetchUrlTool {
7583
7637
  * @throws {ToolError} If fetching or processing fails
7584
7638
  */
7585
7639
  async execute(options) {
7586
- return analytics.trackTool(
7587
- "fetch_url",
7588
- async () => {
7589
- const { url, scrapeMode = ScrapeMode.Auto, headers } = options;
7590
- const canFetchResults = this.fetchers.map((f) => f.canFetch(url));
7591
- const fetcherIndex = canFetchResults.indexOf(true);
7592
- if (fetcherIndex === -1) {
7593
- throw new ToolError(
7594
- `Invalid URL: ${url}. Must be an HTTP/HTTPS URL or a file:// URL.`,
7595
- this.constructor.name
7596
- );
7597
- }
7598
- const fetcher = this.fetchers[fetcherIndex];
7599
- logger.debug(`Using fetcher "${fetcher.constructor.name}" for URL: ${url}`);
7600
- try {
7601
- logger.info(`📡 Fetching ${url}...`);
7602
- const rawContent = await fetcher.fetch(url, {
7603
- followRedirects: options.followRedirects ?? true,
7604
- maxRetries: 3,
7605
- headers
7606
- // propagate custom headers
7607
- });
7608
- logger.info("🔄 Processing content...");
7609
- let processed;
7610
- for (const pipeline of this.pipelines) {
7611
- if (pipeline.canProcess(rawContent)) {
7612
- processed = await pipeline.process(
7613
- rawContent,
7614
- {
7615
- url,
7616
- library: "",
7617
- version: "",
7618
- maxDepth: 0,
7619
- maxPages: 1,
7620
- maxConcurrency: 1,
7621
- scope: "subpages",
7622
- followRedirects: options.followRedirects ?? true,
7623
- excludeSelectors: void 0,
7624
- ignoreErrors: false,
7625
- scrapeMode,
7626
- headers
7627
- // propagate custom headers
7628
- },
7629
- fetcher
7630
- );
7631
- break;
7632
- }
7633
- }
7634
- if (!processed) {
7635
- logger.warn(
7636
- `⚠️ Unsupported content type "${rawContent.mimeType}" for ${url}. Returning raw content.`
7637
- );
7638
- const resolvedCharset = resolveCharset(
7639
- rawContent.charset,
7640
- rawContent.content,
7641
- rawContent.mimeType
7642
- );
7643
- const contentString = convertToString(rawContent.content, resolvedCharset);
7644
- return contentString;
7645
- }
7646
- for (const err of processed.errors) {
7647
- logger.warn(`⚠️ Processing error for ${url}: ${err.message}`);
7648
- }
7649
- if (typeof processed.textContent !== "string" || !processed.textContent.trim()) {
7650
- throw new ToolError(
7651
- `Processing resulted in empty content for ${url}`,
7652
- this.constructor.name
7653
- );
7654
- }
7655
- logger.info(`✅ Successfully processed ${url}`);
7656
- return processed.textContent;
7657
- } catch (error) {
7658
- if (error instanceof ScraperError || error instanceof ToolError) {
7659
- throw new ToolError(
7660
- `Failed to fetch or process URL: ${error.message}`,
7661
- this.constructor.name
7662
- );
7663
- }
7664
- throw new ToolError(
7665
- `Failed to fetch or process URL: ${error instanceof Error ? error.message : String(error)}`,
7666
- this.constructor.name
7640
+ const { url, scrapeMode = ScrapeMode.Auto, headers } = options;
7641
+ const canFetchResults = this.fetchers.map((f) => f.canFetch(url));
7642
+ const fetcherIndex = canFetchResults.indexOf(true);
7643
+ if (fetcherIndex === -1) {
7644
+ throw new ToolError(
7645
+ `Invalid URL: ${url}. Must be an HTTP/HTTPS URL or a file:// URL.`,
7646
+ this.constructor.name
7647
+ );
7648
+ }
7649
+ const fetcher = this.fetchers[fetcherIndex];
7650
+ logger.debug(`Using fetcher "${fetcher.constructor.name}" for URL: ${url}`);
7651
+ try {
7652
+ logger.info(`📡 Fetching ${url}...`);
7653
+ const rawContent = await fetcher.fetch(url, {
7654
+ followRedirects: options.followRedirects ?? true,
7655
+ maxRetries: 3,
7656
+ headers
7657
+ // propagate custom headers
7658
+ });
7659
+ logger.info("🔄 Processing content...");
7660
+ let processed;
7661
+ for (const pipeline of this.pipelines) {
7662
+ if (pipeline.canProcess(rawContent)) {
7663
+ processed = await pipeline.process(
7664
+ rawContent,
7665
+ {
7666
+ url,
7667
+ library: "",
7668
+ version: "",
7669
+ maxDepth: 0,
7670
+ maxPages: 1,
7671
+ maxConcurrency: 1,
7672
+ scope: "subpages",
7673
+ followRedirects: options.followRedirects ?? true,
7674
+ excludeSelectors: void 0,
7675
+ ignoreErrors: false,
7676
+ scrapeMode,
7677
+ headers
7678
+ // propagate custom headers
7679
+ },
7680
+ fetcher
7667
7681
  );
7668
- } finally {
7669
- await Promise.allSettled(this.pipelines.map((pipeline) => pipeline.close()));
7682
+ break;
7670
7683
  }
7671
- },
7672
- (result) => {
7673
- const { url, scrapeMode, followRedirects, headers } = options;
7674
- return {
7675
- url,
7676
- scrapeMode,
7677
- followRedirects,
7678
- contentLength: result.length,
7679
- hasHeaders: !!headers
7680
- };
7681
7684
  }
7682
- );
7685
+ if (!processed) {
7686
+ logger.warn(
7687
+ `⚠️ Unsupported content type "${rawContent.mimeType}" for ${url}. Returning raw content.`
7688
+ );
7689
+ const resolvedCharset = resolveCharset(
7690
+ rawContent.charset,
7691
+ rawContent.content,
7692
+ rawContent.mimeType
7693
+ );
7694
+ const contentString = convertToString(rawContent.content, resolvedCharset);
7695
+ return contentString;
7696
+ }
7697
+ for (const err of processed.errors) {
7698
+ logger.warn(`⚠️ Processing error for ${url}: ${err.message}`);
7699
+ }
7700
+ if (typeof processed.textContent !== "string" || !processed.textContent.trim()) {
7701
+ throw new ToolError(
7702
+ `Processing resulted in empty content for ${url}`,
7703
+ this.constructor.name
7704
+ );
7705
+ }
7706
+ logger.info(`✅ Successfully processed ${url}`);
7707
+ return processed.textContent;
7708
+ } catch (error) {
7709
+ if (error instanceof ScraperError || error instanceof ToolError) {
7710
+ throw new ToolError(
7711
+ `Failed to fetch or process URL: ${error.message}`,
7712
+ this.constructor.name
7713
+ );
7714
+ }
7715
+ throw new ToolError(
7716
+ `Failed to fetch or process URL: ${error instanceof Error ? error.message : String(error)}`,
7717
+ this.constructor.name
7718
+ );
7719
+ } finally {
7720
+ await Promise.allSettled(this.pipelines.map((pipeline) => pipeline.close()));
7721
+ }
7683
7722
  }
7684
7723
  }
7685
7724
  class FindVersionTool {
@@ -7692,50 +7731,36 @@ class FindVersionTool {
7692
7731
  * @returns A descriptive string indicating the best match and unversioned status, or an error message.
7693
7732
  */
7694
7733
  async execute(options) {
7695
- return analytics.trackTool(
7696
- "find_version",
7697
- async () => {
7698
- const { library, targetVersion } = options;
7699
- const libraryAndVersion = `${library}${targetVersion ? `@${targetVersion}` : ""}`;
7700
- try {
7701
- const { bestMatch, hasUnversioned } = await this.docService.findBestVersion(
7702
- library,
7703
- targetVersion
7704
- );
7705
- let message = "";
7706
- if (bestMatch) {
7707
- message = `Best match: ${bestMatch}.`;
7708
- if (hasUnversioned) {
7709
- message += " Unversioned docs also available.";
7710
- }
7711
- } else if (hasUnversioned) {
7712
- message = `No matching version found for ${libraryAndVersion}, but unversioned docs exist.`;
7713
- } else {
7714
- message = `No matching version or unversioned documents found for ${libraryAndVersion}.`;
7715
- }
7716
- return { message, bestMatch, hasUnversioned };
7717
- } catch (error) {
7718
- if (error instanceof VersionNotFoundError) {
7719
- logger.info(`ℹ️ Version not found: ${error.message}`);
7720
- const message = `No matching version or unversioned documents found for ${libraryAndVersion}. Available: ${error.availableVersions.length > 0 ? error.availableVersions.map((v) => v.version).join(", ") : "None"}.`;
7721
- return { message, bestMatch: null, hasUnversioned: false };
7722
- }
7723
- logger.error(
7724
- `❌ Error finding version for ${libraryAndVersion}: ${error instanceof Error ? error.message : error}`
7725
- );
7726
- throw error;
7727
- }
7728
- },
7729
- (result) => {
7730
- const { library, targetVersion } = options;
7731
- return {
7732
- library,
7733
- targetVersion,
7734
- foundMatch: !!result.bestMatch,
7735
- hasUnversioned: result.hasUnversioned
7736
- };
7734
+ const { library, targetVersion } = options;
7735
+ const libraryAndVersion = `${library}${targetVersion ? `@${targetVersion}` : ""}`;
7736
+ try {
7737
+ const { bestMatch, hasUnversioned } = await this.docService.findBestVersion(
7738
+ library,
7739
+ targetVersion
7740
+ );
7741
+ let message = "";
7742
+ if (bestMatch) {
7743
+ message = `Best match: ${bestMatch}.`;
7744
+ if (hasUnversioned) {
7745
+ message += " Unversioned docs also available.";
7746
+ }
7747
+ } else if (hasUnversioned) {
7748
+ message = `No matching version found for ${libraryAndVersion}, but unversioned docs exist.`;
7749
+ } else {
7750
+ message = `No matching version or unversioned documents found for ${libraryAndVersion}.`;
7737
7751
  }
7738
- ).then((result) => result.message);
7752
+ return message;
7753
+ } catch (error) {
7754
+ if (error instanceof VersionNotFoundError) {
7755
+ logger.info(`ℹ️ Version not found: ${error.message}`);
7756
+ const message = `No matching version or unversioned documents found for ${libraryAndVersion}. Available: ${error.availableVersions.length > 0 ? error.availableVersions.map((v) => v.version).join(", ") : "None"}.`;
7757
+ return message;
7758
+ }
7759
+ logger.error(
7760
+ `❌ Error finding version for ${libraryAndVersion}: ${error instanceof Error ? error.message : error}`
7761
+ );
7762
+ throw error;
7763
+ }
7739
7764
  }
7740
7765
  }
7741
7766
  class GetJobInfoTool {
@@ -7753,41 +7778,29 @@ class GetJobInfoTool {
7753
7778
  * @returns A promise that resolves with the simplified job info or null if not found.
7754
7779
  */
7755
7780
  async execute(input) {
7756
- return analytics.trackTool(
7757
- "get_job_info",
7758
- async () => {
7759
- const job = await this.pipeline.getJob(input.jobId);
7760
- if (!job) {
7761
- return { job: null };
7762
- }
7763
- const jobInfo = {
7764
- id: job.id,
7765
- library: job.library,
7766
- version: job.version,
7767
- status: job.status,
7768
- dbStatus: job.versionStatus,
7769
- createdAt: job.createdAt.toISOString(),
7770
- startedAt: job.startedAt?.toISOString() ?? null,
7771
- finishedAt: job.finishedAt?.toISOString() ?? null,
7772
- error: job.error?.message ?? null,
7773
- progress: job.progressMaxPages && job.progressMaxPages > 0 ? {
7774
- pages: job.progressPages || 0,
7775
- totalPages: job.progressMaxPages,
7776
- totalDiscovered: job.progress?.totalDiscovered || job.progressMaxPages
7777
- } : void 0,
7778
- updatedAt: job.updatedAt?.toISOString(),
7779
- errorMessage: job.errorMessage ?? void 0
7780
- };
7781
- return { job: jobInfo };
7782
- },
7783
- (result) => {
7784
- return {
7785
- found: result.job !== null,
7786
- library: result.job?.library,
7787
- version: result.job?.version
7788
- };
7789
- }
7790
- );
7781
+ const job = await this.pipeline.getJob(input.jobId);
7782
+ if (!job) {
7783
+ return { job: null };
7784
+ }
7785
+ const jobInfo = {
7786
+ id: job.id,
7787
+ library: job.library,
7788
+ version: job.version,
7789
+ status: job.status,
7790
+ dbStatus: job.versionStatus,
7791
+ createdAt: job.createdAt.toISOString(),
7792
+ startedAt: job.startedAt?.toISOString() ?? null,
7793
+ finishedAt: job.finishedAt?.toISOString() ?? null,
7794
+ error: job.error?.message ?? null,
7795
+ progress: job.progressMaxPages && job.progressMaxPages > 0 ? {
7796
+ pages: job.progressPages || 0,
7797
+ totalPages: job.progressMaxPages,
7798
+ totalDiscovered: job.progress?.totalDiscovered || job.progressMaxPages
7799
+ } : void 0,
7800
+ updatedAt: job.updatedAt?.toISOString(),
7801
+ errorMessage: job.errorMessage ?? void 0
7802
+ };
7803
+ return { job: jobInfo };
7791
7804
  }
7792
7805
  }
7793
7806
  class ListJobsTool {
@@ -7805,45 +7818,28 @@ class ListJobsTool {
7805
7818
  * @returns A promise that resolves with the list of simplified job objects.
7806
7819
  */
7807
7820
  async execute(input) {
7808
- return analytics.trackTool(
7809
- "list_jobs",
7810
- async () => {
7811
- const jobs = await this.pipeline.getJobs(input.status);
7812
- const simplifiedJobs = jobs.map((job) => {
7813
- return {
7814
- id: job.id,
7815
- library: job.library,
7816
- version: job.version,
7817
- status: job.status,
7818
- dbStatus: job.versionStatus,
7819
- createdAt: job.createdAt.toISOString(),
7820
- startedAt: job.startedAt?.toISOString() ?? null,
7821
- finishedAt: job.finishedAt?.toISOString() ?? null,
7822
- error: job.error?.message ?? null,
7823
- progress: job.progressMaxPages && job.progressMaxPages > 0 ? {
7824
- pages: job.progressPages || 0,
7825
- totalPages: job.progressMaxPages,
7826
- totalDiscovered: job.progress?.totalDiscovered || job.progressMaxPages
7827
- } : void 0,
7828
- updatedAt: job.updatedAt?.toISOString(),
7829
- errorMessage: job.errorMessage ?? void 0
7830
- };
7831
- });
7832
- return { jobs: simplifiedJobs };
7833
- },
7834
- (result) => {
7835
- return {
7836
- jobCount: result.jobs.length,
7837
- statusCounts: result.jobs.reduce(
7838
- (acc, job) => {
7839
- acc[job.status] = (acc[job.status] || 0) + 1;
7840
- return acc;
7841
- },
7842
- {}
7843
- )
7844
- };
7845
- }
7846
- );
7821
+ const jobs = await this.pipeline.getJobs(input.status);
7822
+ const simplifiedJobs = jobs.map((job) => {
7823
+ return {
7824
+ id: job.id,
7825
+ library: job.library,
7826
+ version: job.version,
7827
+ status: job.status,
7828
+ dbStatus: job.versionStatus,
7829
+ createdAt: job.createdAt.toISOString(),
7830
+ startedAt: job.startedAt?.toISOString() ?? null,
7831
+ finishedAt: job.finishedAt?.toISOString() ?? null,
7832
+ error: job.error?.message ?? null,
7833
+ progress: job.progressMaxPages && job.progressMaxPages > 0 ? {
7834
+ pages: job.progressPages || 0,
7835
+ totalPages: job.progressMaxPages,
7836
+ totalDiscovered: job.progress?.totalDiscovered || job.progressMaxPages
7837
+ } : void 0,
7838
+ updatedAt: job.updatedAt?.toISOString(),
7839
+ errorMessage: job.errorMessage ?? void 0
7840
+ };
7841
+ });
7842
+ return { jobs: simplifiedJobs };
7847
7843
  }
7848
7844
  }
7849
7845
  class ListLibrariesTool {
@@ -7852,32 +7848,20 @@ class ListLibrariesTool {
7852
7848
  this.docService = docService;
7853
7849
  }
7854
7850
  async execute(_options) {
7855
- return analytics.trackTool(
7856
- "list_libraries",
7857
- async () => {
7858
- const rawLibraries = await this.docService.listLibraries();
7859
- const libraries = rawLibraries.map(({ library, versions }) => ({
7860
- name: library,
7861
- versions: versions.map((v) => ({
7862
- version: v.ref.version,
7863
- documentCount: v.counts.documents,
7864
- uniqueUrlCount: v.counts.uniqueUrls,
7865
- indexedAt: v.indexedAt,
7866
- status: v.status,
7867
- ...v.progress ? { progress: v.progress } : void 0,
7868
- sourceUrl: v.sourceUrl
7869
- }))
7870
- }));
7871
- return { libraries };
7872
- },
7873
- (result) => ({
7874
- libraryCount: result.libraries.length,
7875
- totalVersions: result.libraries.reduce(
7876
- (sum, lib) => sum + lib.versions.length,
7877
- 0
7878
- )
7879
- })
7880
- );
7851
+ const rawLibraries = await this.docService.listLibraries();
7852
+ const libraries = rawLibraries.map(({ library, versions }) => ({
7853
+ name: library,
7854
+ versions: versions.map((v) => ({
7855
+ version: v.ref.version,
7856
+ documentCount: v.counts.documents,
7857
+ uniqueUrlCount: v.counts.uniqueUrls,
7858
+ indexedAt: v.indexedAt,
7859
+ status: v.status,
7860
+ ...v.progress ? { progress: v.progress } : void 0,
7861
+ sourceUrl: v.sourceUrl
7862
+ }))
7863
+ }));
7864
+ return { libraries };
7881
7865
  }
7882
7866
  }
7883
7867
  class RemoveTool {
@@ -7891,42 +7875,29 @@ class RemoveTool {
7891
7875
  * Removes all documents, the version record, and the library if no other versions exist.
7892
7876
  */
7893
7877
  async execute(args) {
7894
- return analytics.trackTool(
7895
- "remove_docs",
7896
- async () => {
7897
- const { library, version: version2 } = args;
7898
- logger.info(`🗑️ Removing library: ${library}${version2 ? `@${version2}` : ""}`);
7899
- try {
7900
- const allJobs = await this.pipeline.getJobs();
7901
- const jobs = allJobs.filter(
7902
- (job) => job.library === library && job.version === (version2 ?? "") && (job.status === PipelineJobStatus.QUEUED || job.status === PipelineJobStatus.RUNNING)
7903
- );
7904
- for (const job of jobs) {
7905
- logger.info(
7906
- `🚫 Aborting job for ${library}@${version2 ?? ""} before deletion: ${job.id}`
7907
- );
7908
- await this.pipeline.cancelJob(job.id);
7909
- await this.pipeline.waitForJobCompletion(job.id);
7910
- }
7911
- await this.documentManagementService.removeVersion(library, version2);
7912
- const message = `Successfully removed ${library}${version2 ? `@${version2}` : ""}.`;
7913
- logger.info(`✅ ${message}`);
7914
- return { message };
7915
- } catch (error) {
7916
- const errorMessage = `Failed to remove ${library}${version2 ? `@${version2}` : ""}: ${error instanceof Error ? error.message : String(error)}`;
7917
- logger.error(`❌ Error removing library: ${errorMessage}`);
7918
- throw new ToolError(errorMessage, this.constructor.name);
7919
- }
7920
- },
7921
- () => {
7922
- const { library, version: version2 } = args;
7923
- return {
7924
- library,
7925
- version: version2
7926
- // Success is implicit since if this callback runs, no exception was thrown
7927
- };
7878
+ const { library, version: version2 } = args;
7879
+ logger.info(`🗑️ Removing library: ${library}${version2 ? `@${version2}` : ""}`);
7880
+ try {
7881
+ const allJobs = await this.pipeline.getJobs();
7882
+ const jobs = allJobs.filter(
7883
+ (job) => job.library === library && job.version === (version2 ?? "") && (job.status === PipelineJobStatus.QUEUED || job.status === PipelineJobStatus.RUNNING)
7884
+ );
7885
+ for (const job of jobs) {
7886
+ logger.info(
7887
+ `🚫 Aborting job for ${library}@${version2 ?? ""} before deletion: ${job.id}`
7888
+ );
7889
+ await this.pipeline.cancelJob(job.id);
7890
+ await this.pipeline.waitForJobCompletion(job.id);
7928
7891
  }
7929
- );
7892
+ await this.documentManagementService.removeVersion(library, version2);
7893
+ const message = `Successfully removed ${library}${version2 ? `@${version2}` : ""}.`;
7894
+ logger.info(`✅ ${message}`);
7895
+ return { message };
7896
+ } catch (error) {
7897
+ const errorMessage = `Failed to remove ${library}${version2 ? `@${version2}` : ""}: ${error instanceof Error ? error.message : String(error)}`;
7898
+ logger.error(`❌ Error removing library: ${errorMessage}`);
7899
+ throw new ToolError(errorMessage, this.constructor.name);
7900
+ }
7930
7901
  }
7931
7902
  }
7932
7903
  class ScrapeTool {
@@ -7942,80 +7913,66 @@ class ScrapeTool {
7942
7913
  options: scraperOptions,
7943
7914
  waitForCompletion = true
7944
7915
  } = options;
7945
- return analytics.trackTool(
7946
- "scrape_docs",
7947
- async () => {
7948
- let internalVersion;
7949
- const partialVersionRegex = /^\d+(\.\d+)?$/;
7950
- if (version2 === null || version2 === void 0) {
7951
- internalVersion = "";
7916
+ let internalVersion;
7917
+ const partialVersionRegex = /^\d+(\.\d+)?$/;
7918
+ if (version2 === null || version2 === void 0) {
7919
+ internalVersion = "";
7920
+ } else {
7921
+ const validFullVersion = semver.valid(version2);
7922
+ if (validFullVersion) {
7923
+ internalVersion = validFullVersion;
7924
+ } else if (partialVersionRegex.test(version2)) {
7925
+ const coercedVersion = semver.coerce(version2);
7926
+ if (coercedVersion) {
7927
+ internalVersion = coercedVersion.version;
7952
7928
  } else {
7953
- const validFullVersion = semver.valid(version2);
7954
- if (validFullVersion) {
7955
- internalVersion = validFullVersion;
7956
- } else if (partialVersionRegex.test(version2)) {
7957
- const coercedVersion = semver.coerce(version2);
7958
- if (coercedVersion) {
7959
- internalVersion = coercedVersion.version;
7960
- } else {
7961
- throw new Error(
7962
- `Invalid version format for scraping: '${version2}'. Use 'X.Y.Z', 'X.Y.Z-prerelease', 'X.Y', 'X', or omit.`
7963
- );
7964
- }
7965
- } else {
7966
- throw new Error(
7967
- `Invalid version format for scraping: '${version2}'. Use 'X.Y.Z', 'X.Y.Z-prerelease', 'X.Y', 'X', or omit.`
7968
- );
7969
- }
7970
- }
7971
- internalVersion = internalVersion.toLowerCase();
7972
- const pipeline = this.pipeline;
7973
- const enqueueVersion = internalVersion === "" ? null : internalVersion;
7974
- const jobId = await pipeline.enqueueJob(library, enqueueVersion, {
7975
- url,
7976
- library,
7977
- version: internalVersion,
7978
- scope: scraperOptions?.scope ?? "subpages",
7979
- followRedirects: scraperOptions?.followRedirects ?? true,
7980
- maxPages: scraperOptions?.maxPages ?? DEFAULT_MAX_PAGES,
7981
- maxDepth: scraperOptions?.maxDepth ?? DEFAULT_MAX_DEPTH$1,
7982
- maxConcurrency: scraperOptions?.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY,
7983
- ignoreErrors: scraperOptions?.ignoreErrors ?? true,
7984
- scrapeMode: scraperOptions?.scrapeMode ?? ScrapeMode.Auto,
7985
- // Pass scrapeMode enum
7986
- includePatterns: scraperOptions?.includePatterns,
7987
- excludePatterns: scraperOptions?.excludePatterns,
7988
- headers: scraperOptions?.headers
7989
- // <-- propagate headers
7990
- });
7991
- if (waitForCompletion) {
7992
- try {
7993
- await pipeline.waitForJobCompletion(jobId);
7994
- const finalJob = await pipeline.getJob(jobId);
7995
- const finalPagesScraped = finalJob?.progress?.pagesScraped ?? 0;
7996
- logger.debug(
7997
- `Job ${jobId} finished with status ${finalJob?.status}. Pages scraped: ${finalPagesScraped}`
7998
- );
7999
- return {
8000
- pagesScraped: finalPagesScraped
8001
- };
8002
- } catch (error) {
8003
- logger.error(`❌ Job ${jobId} failed or was cancelled: ${error}`);
8004
- throw error;
8005
- }
7929
+ throw new Error(
7930
+ `Invalid version format for scraping: '${version2}'. Use 'X.Y.Z', 'X.Y.Z-prerelease', 'X.Y', 'X', or omit.`
7931
+ );
8006
7932
  }
8007
- return { jobId };
8008
- },
8009
- (result) => ({
8010
- library,
8011
- version: version2,
8012
- url,
8013
- waitForCompletion,
8014
- ...scraperOptions,
8015
- isBackgroundJob: "jobId" in result,
8016
- pagesScraped: "pagesScraped" in result ? result.pagesScraped : void 0
8017
- })
8018
- );
7933
+ } else {
7934
+ throw new Error(
7935
+ `Invalid version format for scraping: '${version2}'. Use 'X.Y.Z', 'X.Y.Z-prerelease', 'X.Y', 'X', or omit.`
7936
+ );
7937
+ }
7938
+ }
7939
+ internalVersion = internalVersion.toLowerCase();
7940
+ const pipeline = this.pipeline;
7941
+ const enqueueVersion = internalVersion === "" ? null : internalVersion;
7942
+ const jobId = await pipeline.enqueueJob(library, enqueueVersion, {
7943
+ url,
7944
+ library,
7945
+ version: internalVersion,
7946
+ scope: scraperOptions?.scope ?? "subpages",
7947
+ followRedirects: scraperOptions?.followRedirects ?? true,
7948
+ maxPages: scraperOptions?.maxPages ?? DEFAULT_MAX_PAGES,
7949
+ maxDepth: scraperOptions?.maxDepth ?? DEFAULT_MAX_DEPTH$1,
7950
+ maxConcurrency: scraperOptions?.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY,
7951
+ ignoreErrors: scraperOptions?.ignoreErrors ?? true,
7952
+ scrapeMode: scraperOptions?.scrapeMode ?? ScrapeMode.Auto,
7953
+ // Pass scrapeMode enum
7954
+ includePatterns: scraperOptions?.includePatterns,
7955
+ excludePatterns: scraperOptions?.excludePatterns,
7956
+ headers: scraperOptions?.headers
7957
+ // <-- propagate headers
7958
+ });
7959
+ if (waitForCompletion) {
7960
+ try {
7961
+ await pipeline.waitForJobCompletion(jobId);
7962
+ const finalJob = await pipeline.getJob(jobId);
7963
+ const finalPagesScraped = finalJob?.progress?.pagesScraped ?? 0;
7964
+ logger.debug(
7965
+ `Job ${jobId} finished with status ${finalJob?.status}. Pages scraped: ${finalPagesScraped}`
7966
+ );
7967
+ return {
7968
+ pagesScraped: finalPagesScraped
7969
+ };
7970
+ } catch (error) {
7971
+ logger.error(`❌ Job ${jobId} failed or was cancelled: ${error}`);
7972
+ throw error;
7973
+ }
7974
+ }
7975
+ return { jobId };
8019
7976
  }
8020
7977
  }
8021
7978
  class SearchTool {
@@ -8025,56 +7982,43 @@ class SearchTool {
8025
7982
  }
8026
7983
  async execute(options) {
8027
7984
  const { library, version: version2, query, limit = 5, exactMatch = false } = options;
8028
- return analytics.trackTool(
8029
- "search_docs",
8030
- async () => {
8031
- if (exactMatch && (!version2 || version2 === "latest")) {
8032
- await this.docService.validateLibraryExists(library);
8033
- const allLibraries = await this.docService.listLibraries();
8034
- const libraryInfo = allLibraries.find((lib) => lib.library === library);
8035
- const detailedVersions = libraryInfo ? libraryInfo.versions.map((v) => ({
8036
- version: v.ref.version,
8037
- documentCount: v.counts.documents,
8038
- uniqueUrlCount: v.counts.uniqueUrls,
8039
- indexedAt: v.indexedAt
8040
- })) : [];
8041
- throw new VersionNotFoundError(library, version2 ?? "latest", detailedVersions);
8042
- }
8043
- const resolvedVersion = version2 || "latest";
8044
- logger.info(
8045
- `🔍 Searching ${library}@${resolvedVersion} for: ${query}${exactMatch ? " (exact match)" : ""}`
8046
- );
8047
- try {
8048
- await this.docService.validateLibraryExists(library);
8049
- let versionToSearch = resolvedVersion;
8050
- if (!exactMatch) {
8051
- const versionResult = await this.docService.findBestVersion(library, version2);
8052
- versionToSearch = versionResult.bestMatch;
8053
- }
8054
- const results = await this.docService.searchStore(
8055
- library,
8056
- versionToSearch,
8057
- query,
8058
- limit
8059
- );
8060
- logger.info(`✅ Found ${results.length} matching results`);
8061
- return { results };
8062
- } catch (error) {
8063
- logger.error(
8064
- `❌ Search failed: ${error instanceof Error ? error.message : "Unknown error"}`
8065
- );
8066
- throw error;
8067
- }
8068
- },
8069
- (result) => ({
7985
+ if (exactMatch && (!version2 || version2 === "latest")) {
7986
+ await this.docService.validateLibraryExists(library);
7987
+ const allLibraries = await this.docService.listLibraries();
7988
+ const libraryInfo = allLibraries.find((lib) => lib.library === library);
7989
+ const detailedVersions = libraryInfo ? libraryInfo.versions.map((v) => ({
7990
+ version: v.ref.version,
7991
+ documentCount: v.counts.documents,
7992
+ uniqueUrlCount: v.counts.uniqueUrls,
7993
+ indexedAt: v.indexedAt
7994
+ })) : [];
7995
+ throw new VersionNotFoundError(library, version2 ?? "latest", detailedVersions);
7996
+ }
7997
+ const resolvedVersion = version2 || "latest";
7998
+ logger.info(
7999
+ `🔍 Searching ${library}@${resolvedVersion} for: ${query}${exactMatch ? " (exact match)" : ""}`
8000
+ );
8001
+ try {
8002
+ await this.docService.validateLibraryExists(library);
8003
+ let versionToSearch = resolvedVersion;
8004
+ if (!exactMatch) {
8005
+ const versionResult = await this.docService.findBestVersion(library, version2);
8006
+ versionToSearch = versionResult.bestMatch;
8007
+ }
8008
+ const results = await this.docService.searchStore(
8070
8009
  library,
8071
- version: version2,
8010
+ versionToSearch,
8072
8011
  query,
8073
- limit,
8074
- exactMatch,
8075
- resultCount: result.results.length
8076
- })
8077
- );
8012
+ limit
8013
+ );
8014
+ logger.info(`✅ Found ${results.length} matching results`);
8015
+ return { results };
8016
+ } catch (error) {
8017
+ logger.error(
8018
+ `❌ Search failed: ${error instanceof Error ? error.message : "Unknown error"}`
8019
+ );
8020
+ throw error;
8021
+ }
8078
8022
  }
8079
8023
  }
8080
8024
  function createResponse(text) {
@@ -8133,6 +8077,17 @@ function createMcpServerInstance(tools, readOnly = false) {
8133
8077
  // requires internet access
8134
8078
  },
8135
8079
  async ({ url, library, version: version2, maxPages, maxDepth, scope, followRedirects }) => {
8080
+ analytics.track(TelemetryEvent.TOOL_USED, {
8081
+ tool: "scrape_docs",
8082
+ context: "mcp_server",
8083
+ library,
8084
+ version: version2,
8085
+ url: new URL(url).hostname,
8086
+ // Privacy-safe URL tracking
8087
+ maxPages,
8088
+ maxDepth,
8089
+ scope
8090
+ });
8136
8091
  try {
8137
8092
  const result = await tools.scrape.execute({
8138
8093
  url,
@@ -8177,6 +8132,15 @@ function createMcpServerInstance(tools, readOnly = false) {
8177
8132
  destructiveHint: false
8178
8133
  },
8179
8134
  async ({ library, version: version2, query, limit }) => {
8135
+ analytics.track(TelemetryEvent.TOOL_USED, {
8136
+ tool: "search_docs",
8137
+ context: "mcp_server",
8138
+ library,
8139
+ version: version2,
8140
+ query: query.substring(0, 100),
8141
+ // Truncate query for privacy
8142
+ limit
8143
+ });
8180
8144
  try {
8181
8145
  const result = await tools.search.execute({
8182
8146
  library,
@@ -8236,6 +8200,10 @@ ${r.content}
8236
8200
  destructiveHint: false
8237
8201
  },
8238
8202
  async () => {
8203
+ analytics.track(TelemetryEvent.TOOL_USED, {
8204
+ tool: "list_libraries",
8205
+ context: "mcp_server"
8206
+ });
8239
8207
  try {
8240
8208
  const result = await tools.listLibraries.execute();
8241
8209
  if (result.libraries.length === 0) {
@@ -8266,6 +8234,12 @@ ${result.libraries.map((lib) => `- ${lib.name}`).join("\n")}`
8266
8234
  destructiveHint: false
8267
8235
  },
8268
8236
  async ({ library, targetVersion }) => {
8237
+ analytics.track(TelemetryEvent.TOOL_USED, {
8238
+ tool: "find_version",
8239
+ context: "mcp_server",
8240
+ library,
8241
+ targetVersion
8242
+ });
8269
8243
  try {
8270
8244
  const message = await tools.findVersion.execute({
8271
8245
  library,
@@ -8295,6 +8269,11 @@ ${result.libraries.map((lib) => `- ${lib.name}`).join("\n")}`
8295
8269
  destructiveHint: false
8296
8270
  },
8297
8271
  async ({ status }) => {
8272
+ analytics.track(TelemetryEvent.TOOL_USED, {
8273
+ tool: "list_jobs",
8274
+ context: "mcp_server",
8275
+ status
8276
+ });
8298
8277
  try {
8299
8278
  const result = await tools.listJobs.execute({
8300
8279
  status
@@ -8333,6 +8312,11 @@ ${formattedJobs}` : "No jobs found."
8333
8312
  destructiveHint: false
8334
8313
  },
8335
8314
  async ({ jobId }) => {
8315
+ analytics.track(TelemetryEvent.TOOL_USED, {
8316
+ tool: "get_job_info",
8317
+ context: "mcp_server",
8318
+ jobId
8319
+ });
8336
8320
  try {
8337
8321
  const result = await tools.getJobInfo.execute({ jobId });
8338
8322
  if (!result.job) {
@@ -8367,6 +8351,11 @@ ${formattedJob}`);
8367
8351
  destructiveHint: true
8368
8352
  },
8369
8353
  async ({ jobId }) => {
8354
+ analytics.track(TelemetryEvent.TOOL_USED, {
8355
+ tool: "cancel_job",
8356
+ context: "mcp_server",
8357
+ jobId
8358
+ });
8370
8359
  try {
8371
8360
  const result = await tools.cancelJob.execute({ jobId });
8372
8361
  if (result.success) {
@@ -8392,6 +8381,12 @@ ${formattedJob}`);
8392
8381
  destructiveHint: true
8393
8382
  },
8394
8383
  async ({ library, version: version2 }) => {
8384
+ analytics.track(TelemetryEvent.TOOL_USED, {
8385
+ tool: "remove_docs",
8386
+ context: "mcp_server",
8387
+ library,
8388
+ version: version2
8389
+ });
8395
8390
  try {
8396
8391
  const result = await tools.remove.execute({ library, version: version2 });
8397
8392
  return createResponse(result.message);
@@ -8418,6 +8413,13 @@ ${formattedJob}`);
8418
8413
  // requires internet access
8419
8414
  },
8420
8415
  async ({ url, followRedirects }) => {
8416
+ analytics.track(TelemetryEvent.TOOL_USED, {
8417
+ tool: "fetch_url",
8418
+ context: "mcp_server",
8419
+ url: new URL(url).hostname,
8420
+ // Privacy-safe URL tracking
8421
+ followRedirects
8422
+ });
8421
8423
  try {
8422
8424
  const result = await tools.fetchUrl.execute({ url, followRedirects });
8423
8425
  return createResponse(result);
@@ -8677,6 +8679,18 @@ function createPipelineRouter(trpc) {
8677
8679
  input.version ?? null,
8678
8680
  input.options
8679
8681
  );
8682
+ analytics.track(TelemetryEvent.WEB_SCRAPE_STARTED, {
8683
+ library: input.library,
8684
+ version: input.version || void 0,
8685
+ url: input.options.url,
8686
+ scope: input.options.scope || "subpages",
8687
+ maxDepth: input.options.maxDepth || 3,
8688
+ maxPages: input.options.maxPages || 1e3,
8689
+ maxConcurrency: input.options.maxConcurrency,
8690
+ ignoreErrors: input.options.ignoreErrors,
8691
+ scrapeMode: input.options.scrapeMode,
8692
+ hasCustomHeaders: !!(input.options.headers && Object.keys(input.options.headers).length > 0)
8693
+ });
8680
8694
  return { jobId };
8681
8695
  }
8682
8696
  ),
@@ -8760,6 +8774,13 @@ function createDataRouter(trpc) {
8760
8774
  input.query,
8761
8775
  input.limit ?? 5
8762
8776
  );
8777
+ analytics.track(TelemetryEvent.WEB_SEARCH_PERFORMED, {
8778
+ library: input.library,
8779
+ version: input.version || void 0,
8780
+ queryLength: input.query.length,
8781
+ resultCount: results.length,
8782
+ limit: input.limit ?? 5
8783
+ });
8763
8784
  return results;
8764
8785
  }
8765
8786
  ),
@@ -10300,22 +10321,6 @@ async function registerWorkerService(pipeline) {
10300
10321
  logger.debug(
10301
10322
  `Job ${job.id} progress: ${progress.pagesScraped}/${progress.totalPages} pages`
10302
10323
  );
10303
- analytics.track(TelemetryEvent.PIPELINE_JOB_PROGRESS, {
10304
- jobId: job.id,
10305
- // Job IDs are already anonymous
10306
- library: job.library,
10307
- pagesScraped: progress.pagesScraped,
10308
- totalPages: progress.totalPages,
10309
- totalDiscovered: progress.totalDiscovered,
10310
- progressPercent: Math.round(progress.pagesScraped / progress.totalPages * 100),
10311
- currentDepth: progress.depth,
10312
- maxDepth: progress.maxDepth,
10313
- discoveryRatio: Math.round(
10314
- progress.totalDiscovered / progress.totalPages * 100
10315
- ),
10316
- // How much we discovered vs limited total
10317
- queueEfficiency: progress.totalPages > 0 ? Math.round(progress.pagesScraped / progress.totalPages * 100) : 0
10318
- });
10319
10324
  },
10320
10325
  onJobStatusChange: async (job) => {
10321
10326
  logger.debug(`Job ${job.id} status changed to: ${job.status}`);
@@ -11731,9 +11736,9 @@ class DocumentStore {
11731
11736
  return [...vector, ...new Array(this.dbDimension - vector.length).fill(0)];
11732
11737
  }
11733
11738
  /**
11734
- * Initialize the embeddings client using either provided config or environment variables.
11735
- * If no embedding config is provided (null), embeddings will not be initialized.
11736
- * This allows DocumentStore to be used without embeddings for operations that don't need them.
11739
+ * Initialize the embeddings client using the provided config.
11740
+ * If no embedding config is provided (null or undefined), embeddings will not be initialized.
11741
+ * This allows DocumentStore to be used without embeddings for FTS-only operations.
11737
11742
  *
11738
11743
  * Environment variables per provider:
11739
11744
  * - openai: OPENAI_API_KEY (and optionally OPENAI_API_BASE, OPENAI_ORG_ID)
@@ -11743,11 +11748,13 @@ class DocumentStore {
11743
11748
  * - microsoft: Azure OpenAI credentials (AZURE_OPENAI_API_*)
11744
11749
  */
11745
11750
  async initializeEmbeddings() {
11746
- if (this.embeddingConfig === null) {
11747
- logger.debug("Embedding initialization skipped (explicitly disabled)");
11751
+ if (this.embeddingConfig === null || this.embeddingConfig === void 0) {
11752
+ logger.debug(
11753
+ "Embedding initialization skipped (no config provided - FTS-only mode)"
11754
+ );
11748
11755
  return;
11749
11756
  }
11750
- const config = this.embeddingConfig || EmbeddingConfig.parseEmbeddingConfig();
11757
+ const config = this.embeddingConfig;
11751
11758
  if (!areCredentialsAvailable(config.provider)) {
11752
11759
  logger.warn(
11753
11760
  `⚠️ No credentials found for ${config.provider} embedding provider. Vector search is disabled.
@@ -12057,7 +12064,7 @@ class DocumentStore {
12057
12064
  `;
12058
12065
  return `${header}${doc.pageContent}`;
12059
12066
  });
12060
- const maxBatchChars = Number(process.env.DOCS_MCP_EMBEDDING_BATCH_CHARS) || EMBEDDING_BATCH_CHARS;
12067
+ const maxBatchChars = EMBEDDING_BATCH_CHARS;
12061
12068
  const rawEmbeddings = [];
12062
12069
  let currentBatch = [];
12063
12070
  let currentBatchSize = 0;
@@ -12533,14 +12540,13 @@ class DocumentManagementService {
12533
12540
  normalizeVersion(version2) {
12534
12541
  return (version2 ?? "").toLowerCase();
12535
12542
  }
12536
- constructor(embeddingConfig, pipelineConfig) {
12543
+ constructor(embeddingConfig, pipelineConfig, storePath) {
12537
12544
  let dbPath;
12538
12545
  let dbDir;
12539
- const envStorePath = process.env.DOCS_MCP_STORE_PATH;
12540
- if (envStorePath) {
12541
- dbDir = envStorePath;
12546
+ if (storePath) {
12547
+ dbDir = storePath;
12542
12548
  dbPath = path.join(dbDir, "documents.db");
12543
- logger.debug(`Using database directory from DOCS_MCP_STORE_PATH: ${dbDir}`);
12549
+ logger.debug(`Using database directory from storePath parameter: ${dbDir}`);
12544
12550
  } else {
12545
12551
  const projectRoot2 = getProjectRoot();
12546
12552
  const oldDbDir = path.join(projectRoot2, ".store");
@@ -12916,41 +12922,72 @@ async function createDocumentManagement(options = {}) {
12916
12922
  await client.initialize();
12917
12923
  return client;
12918
12924
  }
12919
- const service = new DocumentManagementService(options.embeddingConfig);
12925
+ const service = new DocumentManagementService(
12926
+ options.embeddingConfig,
12927
+ void 0,
12928
+ options.storePath
12929
+ );
12920
12930
  await service.initialize();
12921
12931
  return service;
12922
12932
  }
12923
- async function createLocalDocumentManagement(embeddingConfig) {
12924
- const service = new DocumentManagementService(embeddingConfig);
12933
+ async function createLocalDocumentManagement(embeddingConfig, storePath) {
12934
+ const service = new DocumentManagementService(embeddingConfig, void 0, storePath);
12925
12935
  await service.initialize();
12926
12936
  return service;
12927
12937
  }
12928
12938
  function createDefaultAction(program) {
12929
12939
  return program.addOption(
12930
- new Option("--protocol <protocol>", "Protocol for MCP server").choices(["auto", "stdio", "http"]).default("auto")
12940
+ new Option("--protocol <protocol>", "Protocol for MCP server").env("DOCS_MCP_PROTOCOL").default("auto").choices(["auto", "stdio", "http"])
12931
12941
  ).addOption(
12932
- new Option("--port <number>", "Port for the server").argParser((v) => {
12942
+ new Option("--port <number>", "Port for the server").env("DOCS_MCP_PORT").env("PORT").default(CLI_DEFAULTS.HTTP_PORT.toString()).argParser((v) => {
12933
12943
  const n = Number(v);
12934
12944
  if (!Number.isInteger(n) || n < 1 || n > 65535) {
12935
12945
  throw new Error("Port must be an integer between 1 and 65535");
12936
12946
  }
12937
12947
  return String(n);
12938
- }).default(CLI_DEFAULTS.HTTP_PORT.toString())
12948
+ })
12949
+ ).addOption(
12950
+ new Option("--host <host>", "Host to bind the server to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
12939
12951
  ).addOption(
12940
- new Option("--host <host>", "Host to bind the server to").argParser(validateHost).default(CLI_DEFAULTS.HOST)
12952
+ new Option(
12953
+ "--embedding-model <model>",
12954
+ "Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
12955
+ ).env("DOCS_MCP_EMBEDDING_MODEL")
12941
12956
  ).option("--resume", "Resume interrupted jobs on startup", false).option("--no-resume", "Do not resume jobs on startup").option(
12942
12957
  "--read-only",
12943
12958
  "Run in read-only mode (only expose read tools, disable write/job tools)",
12944
12959
  false
12945
- ).option(
12946
- "--auth-enabled",
12947
- "Enable OAuth2/OIDC authentication for MCP endpoints",
12948
- false
12949
- ).option("--auth-issuer-url <url>", "Issuer/discovery URL for OAuth2/OIDC provider").option(
12950
- "--auth-audience <id>",
12951
- "JWT audience claim (identifies this protected resource)"
12960
+ ).addOption(
12961
+ new Option(
12962
+ "--auth-enabled",
12963
+ "Enable OAuth2/OIDC authentication for MCP endpoints"
12964
+ ).env("DOCS_MCP_AUTH_ENABLED").argParser((value) => {
12965
+ if (value === void 0) {
12966
+ return process.env.DOCS_MCP_AUTH_ENABLED === "true" || process.env.DOCS_MCP_AUTH_ENABLED === "1";
12967
+ }
12968
+ return value;
12969
+ }).default(false)
12970
+ ).addOption(
12971
+ new Option(
12972
+ "--auth-issuer-url <url>",
12973
+ "Issuer/discovery URL for OAuth2/OIDC provider"
12974
+ ).env("DOCS_MCP_AUTH_ISSUER_URL")
12975
+ ).addOption(
12976
+ new Option(
12977
+ "--auth-audience <id>",
12978
+ "JWT audience claim (identifies this protected resource)"
12979
+ ).env("DOCS_MCP_AUTH_AUDIENCE")
12952
12980
  ).action(
12953
12981
  async (options) => {
12982
+ await analytics.track(TelemetryEvent.CLI_COMMAND, {
12983
+ command: "default",
12984
+ protocol: options.protocol,
12985
+ port: options.port,
12986
+ host: options.host,
12987
+ resume: options.resume,
12988
+ readOnly: options.readOnly,
12989
+ authEnabled: !!options.authEnabled
12990
+ });
12954
12991
  const resolvedProtocol = resolveProtocol(options.protocol);
12955
12992
  if (resolvedProtocol === "stdio") {
12956
12993
  setLogLevel(LogLevel.ERROR);
@@ -12967,9 +13004,13 @@ function createDefaultAction(program) {
12967
13004
  validateAuthConfig(authConfig);
12968
13005
  warnHttpUsage(authConfig, port);
12969
13006
  }
13007
+ const globalOptions = program.parent?.opts() || {};
12970
13008
  ensurePlaywrightBrowsersInstalled();
12971
- const embeddingConfig = resolveEmbeddingContext();
12972
- const docService = await createLocalDocumentManagement(embeddingConfig);
13009
+ const embeddingConfig = resolveEmbeddingContext(options.embeddingModel);
13010
+ const docService = await createLocalDocumentManagement(
13011
+ embeddingConfig,
13012
+ globalOptions.storePath
13013
+ );
12973
13014
  const pipelineOptions = {
12974
13015
  recoverJobs: options.resume || false,
12975
13016
  // Use --resume flag for job recovery
@@ -13021,6 +13062,13 @@ function createDefaultAction(program) {
13021
13062
  );
13022
13063
  }
13023
13064
  async function fetchUrlAction(url, options) {
13065
+ await analytics.track(TelemetryEvent.CLI_COMMAND, {
13066
+ command: "fetch-url",
13067
+ url,
13068
+ scrapeMode: options.scrapeMode,
13069
+ followRedirects: options.followRedirects,
13070
+ hasHeaders: options.header.length > 0
13071
+ });
13024
13072
  const headers = parseHeaders(options.header);
13025
13073
  const fetchUrlTool = new FetchUrlTool(new HttpFetcher(), new FileFetcher());
13026
13074
  const content = await fetchUrlTool.execute({
@@ -13057,6 +13105,12 @@ function createFetchUrlCommand(program) {
13057
13105
  ).action(fetchUrlAction);
13058
13106
  }
13059
13107
  async function findVersionAction(library, options) {
13108
+ await analytics.track(TelemetryEvent.CLI_COMMAND, {
13109
+ command: "find-version",
13110
+ library,
13111
+ version: options.version,
13112
+ useServerUrl: !!options.serverUrl
13113
+ });
13060
13114
  const serverUrl = options.serverUrl;
13061
13115
  const docService = await createDocumentManagement({
13062
13116
  serverUrl,
@@ -13081,6 +13135,10 @@ function createFindVersionCommand(program) {
13081
13135
  ).action(findVersionAction);
13082
13136
  }
13083
13137
  async function listAction(options) {
13138
+ await analytics.track(TelemetryEvent.CLI_COMMAND, {
13139
+ command: "list",
13140
+ useServerUrl: !!options.serverUrl
13141
+ });
13084
13142
  const { serverUrl } = options;
13085
13143
  const docService = await createDocumentManagement({
13086
13144
  serverUrl,
@@ -13102,17 +13160,22 @@ function createListCommand(program) {
13102
13160
  }
13103
13161
  function createMcpCommand(program) {
13104
13162
  return program.command("mcp").description("Start MCP server only").addOption(
13105
- new Option("--protocol <protocol>", "Protocol for MCP server").choices(["auto", "stdio", "http"]).default(CLI_DEFAULTS.PROTOCOL)
13163
+ new Option("--protocol <protocol>", "Protocol for MCP server").env("DOCS_MCP_PROTOCOL").default(CLI_DEFAULTS.PROTOCOL).choices(["auto", "stdio", "http"])
13106
13164
  ).addOption(
13107
- new Option("--port <number>", "Port for the MCP server").argParser((v) => {
13165
+ new Option("--port <number>", "Port for the MCP server").env("DOCS_MCP_PORT").env("PORT").default(CLI_DEFAULTS.HTTP_PORT.toString()).argParser((v) => {
13108
13166
  const n = Number(v);
13109
13167
  if (!Number.isInteger(n) || n < 1 || n > 65535) {
13110
13168
  throw new Error("Port must be an integer between 1 and 65535");
13111
13169
  }
13112
13170
  return String(n);
13113
- }).default(CLI_DEFAULTS.HTTP_PORT.toString())
13171
+ })
13114
13172
  ).addOption(
13115
- new Option("--host <host>", "Host to bind the MCP server to").argParser(validateHost).default(CLI_DEFAULTS.HOST)
13173
+ new Option("--host <host>", "Host to bind the MCP server to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
13174
+ ).addOption(
13175
+ new Option(
13176
+ "--embedding-model <model>",
13177
+ "Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
13178
+ ).env("DOCS_MCP_EMBEDDING_MODEL")
13116
13179
  ).option(
13117
13180
  "--server-url <url>",
13118
13181
  "URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
@@ -13120,15 +13183,37 @@ function createMcpCommand(program) {
13120
13183
  "--read-only",
13121
13184
  "Run in read-only mode (only expose read tools, disable write/job tools)",
13122
13185
  false
13123
- ).option(
13124
- "--auth-enabled",
13125
- "Enable OAuth2/OIDC authentication for MCP endpoints",
13126
- false
13127
- ).option("--auth-issuer-url <url>", "Issuer/discovery URL for OAuth2/OIDC provider").option(
13128
- "--auth-audience <id>",
13129
- "JWT audience claim (identifies this protected resource)"
13186
+ ).addOption(
13187
+ new Option(
13188
+ "--auth-enabled",
13189
+ "Enable OAuth2/OIDC authentication for MCP endpoints"
13190
+ ).env("DOCS_MCP_AUTH_ENABLED").argParser((value) => {
13191
+ if (value === void 0) {
13192
+ return process.env.DOCS_MCP_AUTH_ENABLED === "true" || process.env.DOCS_MCP_AUTH_ENABLED === "1";
13193
+ }
13194
+ return value;
13195
+ }).default(false)
13196
+ ).addOption(
13197
+ new Option(
13198
+ "--auth-issuer-url <url>",
13199
+ "Issuer/discovery URL for OAuth2/OIDC provider"
13200
+ ).env("DOCS_MCP_AUTH_ISSUER_URL")
13201
+ ).addOption(
13202
+ new Option(
13203
+ "--auth-audience <id>",
13204
+ "JWT audience claim (identifies this protected resource)"
13205
+ ).env("DOCS_MCP_AUTH_AUDIENCE")
13130
13206
  ).action(
13131
13207
  async (cmdOptions) => {
13208
+ await analytics.track(TelemetryEvent.CLI_COMMAND, {
13209
+ command: "mcp",
13210
+ protocol: cmdOptions.protocol,
13211
+ port: cmdOptions.port,
13212
+ host: cmdOptions.host,
13213
+ useServerUrl: !!cmdOptions.serverUrl,
13214
+ readOnly: cmdOptions.readOnly,
13215
+ authEnabled: !!cmdOptions.authEnabled
13216
+ });
13132
13217
  const port = validatePort(cmdOptions.port);
13133
13218
  const host = validateHost(cmdOptions.host);
13134
13219
  const serverUrl = cmdOptions.serverUrl;
@@ -13144,8 +13229,9 @@ function createMcpCommand(program) {
13144
13229
  if (authConfig) {
13145
13230
  validateAuthConfig(authConfig);
13146
13231
  }
13232
+ const globalOptions = program.parent?.opts() || {};
13147
13233
  try {
13148
- const embeddingConfig = resolveEmbeddingContext();
13234
+ const embeddingConfig = resolveEmbeddingContext(cmdOptions.embeddingModel);
13149
13235
  if (!serverUrl && !embeddingConfig) {
13150
13236
  logger.error(
13151
13237
  "❌ Embedding configuration is required for local mode. Configure an embedding provider with CLI options or environment variables."
@@ -13154,7 +13240,8 @@ function createMcpCommand(program) {
13154
13240
  }
13155
13241
  const docService = await createDocumentManagement({
13156
13242
  serverUrl,
13157
- embeddingConfig
13243
+ embeddingConfig,
13244
+ storePath: globalOptions.storePath
13158
13245
  });
13159
13246
  const pipelineOptions = {
13160
13247
  recoverJobs: false,
@@ -13216,6 +13303,12 @@ function createMcpCommand(program) {
13216
13303
  );
13217
13304
  }
13218
13305
  async function removeAction(library, options) {
13306
+ await analytics.track(TelemetryEvent.CLI_COMMAND, {
13307
+ command: "remove",
13308
+ library,
13309
+ version: options.version,
13310
+ useServerUrl: !!options.serverUrl
13311
+ });
13219
13312
  const serverUrl = options.serverUrl;
13220
13313
  const docService = await createDocumentManagement({
13221
13314
  serverUrl,
@@ -13244,9 +13337,26 @@ function createRemoveCommand(program) {
13244
13337
  "URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
13245
13338
  ).action(removeAction);
13246
13339
  }
13247
- async function scrapeAction(library, url, options) {
13340
+ async function scrapeAction(library, url, options, command) {
13341
+ await analytics.track(TelemetryEvent.CLI_COMMAND, {
13342
+ command: "scrape",
13343
+ library,
13344
+ version: options.version,
13345
+ url,
13346
+ maxPages: Number.parseInt(options.maxPages, 10),
13347
+ maxDepth: Number.parseInt(options.maxDepth, 10),
13348
+ maxConcurrency: Number.parseInt(options.maxConcurrency, 10),
13349
+ scope: options.scope,
13350
+ scrapeMode: options.scrapeMode,
13351
+ followRedirects: options.followRedirects,
13352
+ hasHeaders: options.header.length > 0,
13353
+ hasIncludePatterns: options.includePattern.length > 0,
13354
+ hasExcludePatterns: options.excludePattern.length > 0,
13355
+ useServerUrl: !!options.serverUrl
13356
+ });
13248
13357
  const serverUrl = options.serverUrl;
13249
- const embeddingConfig = resolveEmbeddingContext();
13358
+ const globalOptions = command?.parent?.opts() || {};
13359
+ const embeddingConfig = resolveEmbeddingContext(options.embeddingModel);
13250
13360
  if (!serverUrl && !embeddingConfig) {
13251
13361
  throw new Error(
13252
13362
  "Embedding configuration is required for local scraping. Please set DOCS_MCP_EMBEDDING_MODEL environment variable or use --server-url for remote execution."
@@ -13254,7 +13364,8 @@ async function scrapeAction(library, url, options) {
13254
13364
  }
13255
13365
  const docService = await createDocumentManagement({
13256
13366
  serverUrl,
13257
- embeddingConfig
13367
+ embeddingConfig,
13368
+ storePath: globalOptions.storePath
13258
13369
  });
13259
13370
  let pipeline = null;
13260
13371
  try {
@@ -13356,14 +13467,28 @@ function createScrapeCommand(program) {
13356
13467
  "Custom HTTP header to send with each request (can be specified multiple times)",
13357
13468
  (val, prev = []) => prev.concat([val]),
13358
13469
  []
13470
+ ).addOption(
13471
+ new Option(
13472
+ "--embedding-model <model>",
13473
+ "Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
13474
+ ).env("DOCS_MCP_EMBEDDING_MODEL")
13359
13475
  ).option(
13360
13476
  "--server-url <url>",
13361
13477
  "URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
13362
13478
  ).action(scrapeAction);
13363
13479
  }
13364
13480
  async function searchAction(library, query, options) {
13481
+ await analytics.track(TelemetryEvent.CLI_COMMAND, {
13482
+ command: "search",
13483
+ library,
13484
+ version: options.version,
13485
+ query,
13486
+ limit: Number.parseInt(options.limit, 10),
13487
+ exactMatch: options.exactMatch,
13488
+ useServerUrl: !!options.serverUrl
13489
+ });
13365
13490
  const serverUrl = options.serverUrl;
13366
- const embeddingConfig = resolveEmbeddingContext();
13491
+ const embeddingConfig = resolveEmbeddingContext(options.embeddingModel);
13367
13492
  if (!serverUrl && !embeddingConfig) {
13368
13493
  throw new Error(
13369
13494
  "Embedding configuration is required for local search. Please set DOCS_MCP_EMBEDDING_MODEL environment variable or use --server-url for remote execution."
@@ -13393,139 +13518,185 @@ function createSearchCommand(program) {
13393
13518
  ).option(
13394
13519
  "-v, --version <string>",
13395
13520
  "Version of the library (optional, supports ranges)"
13396
- ).option("-l, --limit <number>", "Maximum number of results", "5").option("-e, --exact-match", "Only use exact version match (default: false)", false).option(
13521
+ ).option("-l, --limit <number>", "Maximum number of results", "5").option("-e, --exact-match", "Only use exact version match (default: false)", false).addOption(
13522
+ new Option(
13523
+ "--embedding-model <model>",
13524
+ "Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
13525
+ ).env("DOCS_MCP_EMBEDDING_MODEL")
13526
+ ).option(
13397
13527
  "--server-url <url>",
13398
13528
  "URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
13399
13529
  ).action(searchAction);
13400
13530
  }
13401
13531
  function createWebCommand(program) {
13402
13532
  return program.command("web").description("Start web interface only").addOption(
13403
- new Option("--port <number>", "Port for the web interface").argParser((v) => {
13533
+ new Option("--port <number>", "Port for the web interface").env("DOCS_MCP_WEB_PORT").env("DOCS_MCP_PORT").env("PORT").default(CLI_DEFAULTS.WEB_PORT.toString()).argParser((v) => {
13404
13534
  const n = Number(v);
13405
13535
  if (!Number.isInteger(n) || n < 1 || n > 65535) {
13406
13536
  throw new Error("Port must be an integer between 1 and 65535");
13407
13537
  }
13408
13538
  return String(n);
13409
- }).default(CLI_DEFAULTS.WEB_PORT.toString())
13539
+ })
13540
+ ).addOption(
13541
+ new Option("--host <host>", "Host to bind the web interface to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
13410
13542
  ).addOption(
13411
- new Option("--host <host>", "Host to bind the web interface to").argParser(validateHost).default(CLI_DEFAULTS.HOST)
13543
+ new Option(
13544
+ "--embedding-model <model>",
13545
+ "Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
13546
+ ).env("DOCS_MCP_EMBEDDING_MODEL")
13412
13547
  ).option(
13413
13548
  "--server-url <url>",
13414
13549
  "URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
13415
- ).action(async (cmdOptions) => {
13416
- const port = validatePort(cmdOptions.port);
13417
- const host = validateHost(cmdOptions.host);
13418
- const serverUrl = cmdOptions.serverUrl;
13419
- try {
13420
- const embeddingConfig = resolveEmbeddingContext();
13421
- if (!serverUrl && !embeddingConfig) {
13422
- logger.error(
13423
- "❌ Embedding configuration is required for local mode. Configure an embedding provider with CLI options or environment variables."
13550
+ ).action(
13551
+ async (cmdOptions) => {
13552
+ await analytics.track(TelemetryEvent.CLI_COMMAND, {
13553
+ command: "web",
13554
+ port: cmdOptions.port,
13555
+ host: cmdOptions.host,
13556
+ useServerUrl: !!cmdOptions.serverUrl
13557
+ });
13558
+ const port = validatePort(cmdOptions.port);
13559
+ const host = validateHost(cmdOptions.host);
13560
+ const serverUrl = cmdOptions.serverUrl;
13561
+ try {
13562
+ const embeddingConfig = resolveEmbeddingContext(cmdOptions.embeddingModel);
13563
+ if (!serverUrl && !embeddingConfig) {
13564
+ logger.error(
13565
+ "❌ Embedding configuration is required for local mode. Configure an embedding provider with CLI options or environment variables."
13566
+ );
13567
+ process.exit(1);
13568
+ }
13569
+ const docService = await createDocumentManagement({
13570
+ serverUrl,
13571
+ embeddingConfig
13572
+ });
13573
+ const pipelineOptions = {
13574
+ recoverJobs: false,
13575
+ // Web command doesn't support job recovery
13576
+ serverUrl,
13577
+ concurrency: 3
13578
+ };
13579
+ const pipeline = await createPipelineWithCallbacks(
13580
+ serverUrl ? void 0 : docService,
13581
+ pipelineOptions
13424
13582
  );
13583
+ const config = createAppServerConfig({
13584
+ enableWebInterface: true,
13585
+ enableMcpServer: false,
13586
+ enableApiServer: false,
13587
+ enableWorker: !serverUrl,
13588
+ port,
13589
+ host,
13590
+ externalWorkerUrl: serverUrl,
13591
+ startupContext: {
13592
+ cliCommand: "web"
13593
+ }
13594
+ });
13595
+ logger.info(
13596
+ `🚀 Starting web interface${serverUrl ? ` connecting to worker at ${serverUrl}` : ""}`
13597
+ );
13598
+ const appServer = await startAppServer(docService, pipeline, config);
13599
+ registerGlobalServices({
13600
+ appServer,
13601
+ docService
13602
+ // pipeline is owned by AppServer - don't register globally to avoid double shutdown
13603
+ });
13604
+ await new Promise(() => {
13605
+ });
13606
+ } catch (error) {
13607
+ logger.error(`❌ Failed to start web interface: ${error}`);
13425
13608
  process.exit(1);
13426
13609
  }
13427
- const docService = await createDocumentManagement({
13428
- serverUrl,
13429
- embeddingConfig
13430
- });
13431
- const pipelineOptions = {
13432
- recoverJobs: false,
13433
- // Web command doesn't support job recovery
13434
- serverUrl,
13435
- concurrency: 3
13436
- };
13437
- const pipeline = await createPipelineWithCallbacks(
13438
- serverUrl ? void 0 : docService,
13439
- pipelineOptions
13440
- );
13441
- const config = createAppServerConfig({
13442
- enableWebInterface: true,
13443
- enableMcpServer: false,
13444
- enableApiServer: false,
13445
- enableWorker: !serverUrl,
13446
- port,
13447
- host,
13448
- externalWorkerUrl: serverUrl,
13449
- startupContext: {
13450
- cliCommand: "web"
13451
- }
13452
- });
13453
- logger.info(
13454
- `🚀 Starting web interface${serverUrl ? ` connecting to worker at ${serverUrl}` : ""}`
13455
- );
13456
- const appServer = await startAppServer(docService, pipeline, config);
13457
- registerGlobalServices({
13458
- appServer,
13459
- docService
13460
- // pipeline is owned by AppServer - don't register globally to avoid double shutdown
13461
- });
13462
- await new Promise(() => {
13463
- });
13464
- } catch (error) {
13465
- logger.error(`❌ Failed to start web interface: ${error}`);
13466
- process.exit(1);
13467
13610
  }
13468
- });
13611
+ );
13469
13612
  }
13470
13613
  function createWorkerCommand(program) {
13471
13614
  return program.command("worker").description("Start external pipeline worker (HTTP API)").addOption(
13472
- new Option("--port <number>", "Port for worker API").argParser((v) => {
13615
+ new Option("--port <number>", "Port for worker API").env("DOCS_MCP_PORT").env("PORT").default("8080").argParser((v) => {
13473
13616
  const n = Number(v);
13474
13617
  if (!Number.isInteger(n) || n < 1 || n > 65535) {
13475
13618
  throw new Error("Port must be an integer between 1 and 65535");
13476
13619
  }
13477
13620
  return String(n);
13478
- }).default("8080")
13621
+ })
13479
13622
  ).addOption(
13480
- new Option("--host <host>", "Host to bind the worker API to").argParser(validateHost).default(CLI_DEFAULTS.HOST)
13481
- ).option("--resume", "Resume interrupted jobs on startup", true).option("--no-resume", "Do not resume jobs on startup").action(async (cmdOptions) => {
13482
- const port = validatePort(cmdOptions.port);
13483
- const host = validateHost(cmdOptions.host);
13484
- try {
13485
- logger.info(`🚀 Starting external pipeline worker on port ${port}`);
13486
- ensurePlaywrightBrowsersInstalled();
13487
- const embeddingConfig = resolveEmbeddingContext();
13488
- const docService = await createLocalDocumentManagement(embeddingConfig);
13489
- const pipelineOptions = {
13490
- recoverJobs: cmdOptions.resume,
13491
- // Use the resume option
13492
- concurrency: CLI_DEFAULTS.MAX_CONCURRENCY
13493
- };
13494
- const pipeline = await createPipelineWithCallbacks(docService, pipelineOptions);
13495
- const config = createAppServerConfig({
13496
- enableWebInterface: false,
13497
- enableMcpServer: false,
13498
- enableApiServer: true,
13499
- enableWorker: true,
13500
- port,
13501
- host,
13502
- startupContext: {
13503
- cliCommand: "worker"
13504
- }
13505
- });
13506
- const appServer = await startAppServer(docService, pipeline, config);
13507
- registerGlobalServices({
13508
- appServer,
13509
- docService
13510
- // pipeline is owned by AppServer - don't register globally to avoid double shutdown
13511
- });
13512
- await new Promise(() => {
13623
+ new Option("--host <host>", "Host to bind the worker API to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
13624
+ ).addOption(
13625
+ new Option(
13626
+ "--embedding-model <model>",
13627
+ "Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
13628
+ ).env("DOCS_MCP_EMBEDDING_MODEL")
13629
+ ).option("--resume", "Resume interrupted jobs on startup", true).option("--no-resume", "Do not resume jobs on startup").action(
13630
+ async (cmdOptions) => {
13631
+ await analytics.track(TelemetryEvent.CLI_COMMAND, {
13632
+ command: "worker",
13633
+ port: cmdOptions.port,
13634
+ host: cmdOptions.host,
13635
+ resume: cmdOptions.resume
13513
13636
  });
13514
- } catch (error) {
13515
- logger.error(`❌ Failed to start external pipeline worker: ${error}`);
13516
- process.exit(1);
13637
+ const port = validatePort(cmdOptions.port);
13638
+ const host = validateHost(cmdOptions.host);
13639
+ try {
13640
+ logger.info(`🚀 Starting external pipeline worker on port ${port}`);
13641
+ ensurePlaywrightBrowsersInstalled();
13642
+ const embeddingConfig = resolveEmbeddingContext(cmdOptions.embeddingModel);
13643
+ const docService = await createLocalDocumentManagement(embeddingConfig);
13644
+ const pipelineOptions = {
13645
+ recoverJobs: cmdOptions.resume,
13646
+ // Use the resume option
13647
+ concurrency: CLI_DEFAULTS.MAX_CONCURRENCY
13648
+ };
13649
+ const pipeline = await createPipelineWithCallbacks(docService, pipelineOptions);
13650
+ const config = createAppServerConfig({
13651
+ enableWebInterface: false,
13652
+ enableMcpServer: false,
13653
+ enableApiServer: true,
13654
+ enableWorker: true,
13655
+ port,
13656
+ host,
13657
+ startupContext: {
13658
+ cliCommand: "worker"
13659
+ }
13660
+ });
13661
+ const appServer = await startAppServer(docService, pipeline, config);
13662
+ registerGlobalServices({
13663
+ appServer,
13664
+ docService
13665
+ // pipeline is owned by AppServer - don't register globally to avoid double shutdown
13666
+ });
13667
+ await new Promise(() => {
13668
+ });
13669
+ } catch (error) {
13670
+ logger.error(`❌ Failed to start external pipeline worker: ${error}`);
13671
+ process.exit(1);
13672
+ }
13517
13673
  }
13518
- });
13674
+ );
13519
13675
  }
13520
13676
  function createCliProgram() {
13521
13677
  const program = new Command();
13522
13678
  const commandStartTimes = /* @__PURE__ */ new Map();
13523
13679
  program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version(packageJson.version).addOption(
13524
13680
  new Option("--verbose", "Enable verbose (debug) logging").conflicts("silent")
13525
- ).addOption(new Option("--silent", "Disable all logging except errors")).addOption(new Option("--no-telemetry", "Disable telemetry collection")).enablePositionalOptions().allowExcessArguments(false).showHelpAfterError(true);
13681
+ ).addOption(new Option("--silent", "Disable all logging except errors")).addOption(
13682
+ new Option("--telemetry", "Enable telemetry collection").env("DOCS_MCP_TELEMETRY").argParser((value) => {
13683
+ if (value === void 0) {
13684
+ return process.env.DOCS_MCP_TELEMETRY !== "false" && process.env.DOCS_MCP_TELEMETRY !== "0";
13685
+ }
13686
+ return value;
13687
+ }).default(true)
13688
+ ).addOption(new Option("--no-telemetry", "Disable telemetry collection")).addOption(
13689
+ new Option("--store-path <path>", "Custom path for data storage directory").env(
13690
+ "DOCS_MCP_STORE_PATH"
13691
+ )
13692
+ ).enablePositionalOptions().allowExcessArguments(false).showHelpAfterError(true);
13526
13693
  program.hook("preAction", async (thisCommand, actionCommand) => {
13527
13694
  const globalOptions = thisCommand.opts();
13528
13695
  setupLogging(globalOptions);
13696
+ initTelemetry({
13697
+ enabled: globalOptions.telemetry ?? true,
13698
+ storePath: globalOptions.storePath
13699
+ });
13529
13700
  if (shouldEnableTelemetry()) {
13530
13701
  if (analytics.isEnabled()) {
13531
13702
  analytics.setGlobalContext({
@@ -13539,8 +13710,6 @@ function createCliProgram() {
13539
13710
  commandStartTimes.set(commandKey, Date.now());
13540
13711
  actionCommand._trackingKey = commandKey;
13541
13712
  }
13542
- } else {
13543
- TelemetryConfig.getInstance().disable();
13544
13713
  }
13545
13714
  });
13546
13715
  program.hook("postAction", async (_thisCommand, actionCommand) => {