@arabold/docs-mcp-server 1.25.0 → 1.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/dist/index.js +770 -694
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -541,32 +541,15 @@ class PostHogClient {
|
|
|
541
541
|
}
|
|
542
542
|
class TelemetryConfig {
|
|
543
543
|
static instance;
|
|
544
|
-
enabled;
|
|
544
|
+
enabled = true;
|
|
545
|
+
// Default to enabled
|
|
545
546
|
constructor() {
|
|
546
|
-
this.enabled = this.determineEnabledState();
|
|
547
|
-
}
|
|
548
|
-
/**
|
|
549
|
-
* Determines if telemetry should be enabled based on CLI flags and environment variables.
|
|
550
|
-
* Priority: CLI flags > environment variables > default (true)
|
|
551
|
-
*/
|
|
552
|
-
determineEnabledState() {
|
|
553
|
-
if (process.env.DOCS_MCP_TELEMETRY === "false") {
|
|
554
|
-
return false;
|
|
555
|
-
}
|
|
556
|
-
const args = process.argv;
|
|
557
|
-
if (args.includes("--no-telemetry")) {
|
|
558
|
-
return false;
|
|
559
|
-
}
|
|
560
|
-
return true;
|
|
561
547
|
}
|
|
562
548
|
isEnabled() {
|
|
563
549
|
return this.enabled;
|
|
564
550
|
}
|
|
565
|
-
|
|
566
|
-
this.enabled =
|
|
567
|
-
}
|
|
568
|
-
enable() {
|
|
569
|
-
this.enabled = true;
|
|
551
|
+
setEnabled(enabled) {
|
|
552
|
+
this.enabled = enabled;
|
|
570
553
|
}
|
|
571
554
|
static getInstance() {
|
|
572
555
|
if (!TelemetryConfig.instance) {
|
|
@@ -575,10 +558,9 @@ class TelemetryConfig {
|
|
|
575
558
|
return TelemetryConfig.instance;
|
|
576
559
|
}
|
|
577
560
|
}
|
|
578
|
-
function generateInstallationId() {
|
|
561
|
+
function generateInstallationId(storePath) {
|
|
579
562
|
try {
|
|
580
|
-
const
|
|
581
|
-
const dataDir = envStorePath || envPaths("docs-mcp-server", { suffix: "" }).data;
|
|
563
|
+
const dataDir = storePath || envPaths("docs-mcp-server", { suffix: "" }).data;
|
|
582
564
|
const installationIdPath = path.join(dataDir, "installation.id");
|
|
583
565
|
if (fs.existsSync(installationIdPath)) {
|
|
584
566
|
const existingId = fs.readFileSync(installationIdPath, "utf8").trim();
|
|
@@ -602,10 +584,10 @@ var TelemetryEvent = /* @__PURE__ */ ((TelemetryEvent2) => {
|
|
|
602
584
|
TelemetryEvent2["APP_SHUTDOWN"] = "app_shutdown";
|
|
603
585
|
TelemetryEvent2["CLI_COMMAND"] = "cli_command";
|
|
604
586
|
TelemetryEvent2["TOOL_USED"] = "tool_used";
|
|
605
|
-
TelemetryEvent2["HTTP_REQUEST_COMPLETED"] = "http_request_completed";
|
|
606
|
-
TelemetryEvent2["PIPELINE_JOB_PROGRESS"] = "pipeline_job_progress";
|
|
607
587
|
TelemetryEvent2["PIPELINE_JOB_COMPLETED"] = "pipeline_job_completed";
|
|
608
588
|
TelemetryEvent2["DOCUMENT_PROCESSED"] = "document_processed";
|
|
589
|
+
TelemetryEvent2["WEB_SEARCH_PERFORMED"] = "web_search_performed";
|
|
590
|
+
TelemetryEvent2["WEB_SCRAPE_STARTED"] = "web_scrape_started";
|
|
609
591
|
return TelemetryEvent2;
|
|
610
592
|
})(TelemetryEvent || {});
|
|
611
593
|
class Analytics {
|
|
@@ -623,6 +605,8 @@ class Analytics {
|
|
|
623
605
|
const analytics2 = new Analytics(shouldEnable);
|
|
624
606
|
if (analytics2.isEnabled()) {
|
|
625
607
|
logger.debug("Analytics enabled");
|
|
608
|
+
} else if (!config.isEnabled()) {
|
|
609
|
+
logger.debug("Analytics disabled (user preference)");
|
|
626
610
|
} else {
|
|
627
611
|
logger.debug("Analytics disabled");
|
|
628
612
|
}
|
|
@@ -682,38 +666,29 @@ class Analytics {
|
|
|
682
666
|
isEnabled() {
|
|
683
667
|
return this.enabled;
|
|
684
668
|
}
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
try {
|
|
691
|
-
const result = await operation();
|
|
692
|
-
this.track("tool_used", {
|
|
693
|
-
tool: toolName,
|
|
694
|
-
success: true,
|
|
695
|
-
durationMs: Date.now() - startTime,
|
|
696
|
-
...getProperties ? getProperties(result) : {}
|
|
697
|
-
});
|
|
698
|
-
return result;
|
|
699
|
-
} catch (error) {
|
|
700
|
-
this.track("tool_used", {
|
|
701
|
-
tool: toolName,
|
|
702
|
-
success: false,
|
|
703
|
-
durationMs: Date.now() - startTime
|
|
704
|
-
});
|
|
705
|
-
if (error instanceof Error) {
|
|
706
|
-
this.captureException(error, {
|
|
707
|
-
tool: toolName,
|
|
708
|
-
context: "tool_execution",
|
|
709
|
-
durationMs: Date.now() - startTime
|
|
710
|
-
});
|
|
711
|
-
}
|
|
712
|
-
throw error;
|
|
713
|
-
}
|
|
669
|
+
}
|
|
670
|
+
let analyticsInstance = null;
|
|
671
|
+
function getAnalytics() {
|
|
672
|
+
if (!analyticsInstance) {
|
|
673
|
+
analyticsInstance = Analytics.create();
|
|
714
674
|
}
|
|
675
|
+
return analyticsInstance;
|
|
715
676
|
}
|
|
716
|
-
|
|
677
|
+
function initTelemetry(options) {
|
|
678
|
+
TelemetryConfig.getInstance().setEnabled(options.enabled);
|
|
679
|
+
generateInstallationId(options.storePath);
|
|
680
|
+
analyticsInstance = Analytics.create();
|
|
681
|
+
}
|
|
682
|
+
const analytics = new Proxy({}, {
|
|
683
|
+
get(target, prop) {
|
|
684
|
+
if (!target.isEnabled) {
|
|
685
|
+
const instance = getAnalytics();
|
|
686
|
+
Object.setPrototypeOf(target, Object.getPrototypeOf(instance));
|
|
687
|
+
Object.assign(target, instance);
|
|
688
|
+
}
|
|
689
|
+
return target[prop];
|
|
690
|
+
}
|
|
691
|
+
});
|
|
717
692
|
function extractHostname(url) {
|
|
718
693
|
try {
|
|
719
694
|
const parsed = new URL(url);
|
|
@@ -734,7 +709,7 @@ function extractProtocol(urlOrPath) {
|
|
|
734
709
|
}
|
|
735
710
|
}
|
|
736
711
|
const name = "@arabold/docs-mcp-server";
|
|
737
|
-
const version = "1.
|
|
712
|
+
const version = "1.25.0";
|
|
738
713
|
const description = "MCP server for fetching and searching documentation";
|
|
739
714
|
const type = "module";
|
|
740
715
|
const bin = { "docs-mcp-server": "dist/index.js" };
|
|
@@ -7118,7 +7093,7 @@ class EmbeddingConfig {
|
|
|
7118
7093
|
}
|
|
7119
7094
|
}
|
|
7120
7095
|
/**
|
|
7121
|
-
* Parse embedding model configuration from
|
|
7096
|
+
* Parse embedding model configuration from a provided model specification.
|
|
7122
7097
|
* This is a synchronous operation that extracts provider, model, and known dimensions.
|
|
7123
7098
|
*
|
|
7124
7099
|
* Supports various providers:
|
|
@@ -7129,11 +7104,11 @@ class EmbeddingConfig {
|
|
|
7129
7104
|
* - microsoft: Azure OpenAI
|
|
7130
7105
|
* - sagemaker: AWS SageMaker hosted models
|
|
7131
7106
|
*
|
|
7132
|
-
* @param modelSpec
|
|
7107
|
+
* @param modelSpec Model specification (e.g., "openai:text-embedding-3-small"), defaults to "text-embedding-3-small"
|
|
7133
7108
|
* @returns Parsed embedding model configuration
|
|
7134
7109
|
*/
|
|
7135
7110
|
parse(modelSpec) {
|
|
7136
|
-
const spec = modelSpec ||
|
|
7111
|
+
const spec = modelSpec || "text-embedding-3-small";
|
|
7137
7112
|
const colonIndex = spec.indexOf(":");
|
|
7138
7113
|
let provider;
|
|
7139
7114
|
let model;
|
|
@@ -7331,16 +7306,13 @@ const CLI_DEFAULTS = {
|
|
|
7331
7306
|
TELEMETRY: true
|
|
7332
7307
|
};
|
|
7333
7308
|
function parseAuthConfig(options) {
|
|
7334
|
-
|
|
7335
|
-
if (!enabled) {
|
|
7309
|
+
if (!options.authEnabled) {
|
|
7336
7310
|
return void 0;
|
|
7337
7311
|
}
|
|
7338
|
-
const issuerUrl = options.authIssuerUrl ?? process.env.DOCS_MCP_AUTH_ISSUER_URL;
|
|
7339
|
-
const audience = options.authAudience ?? process.env.DOCS_MCP_AUTH_AUDIENCE;
|
|
7340
7312
|
return {
|
|
7341
|
-
enabled,
|
|
7342
|
-
issuerUrl,
|
|
7343
|
-
audience,
|
|
7313
|
+
enabled: true,
|
|
7314
|
+
issuerUrl: options.authIssuerUrl,
|
|
7315
|
+
audience: options.authAudience,
|
|
7344
7316
|
scopes: ["openid", "profile"]
|
|
7345
7317
|
// Default scopes for OAuth2/OIDC
|
|
7346
7318
|
};
|
|
@@ -7405,12 +7377,23 @@ function warnHttpUsage(authConfig, port) {
|
|
|
7405
7377
|
);
|
|
7406
7378
|
}
|
|
7407
7379
|
}
|
|
7408
|
-
function resolveEmbeddingContext(
|
|
7380
|
+
function resolveEmbeddingContext(embeddingModel) {
|
|
7409
7381
|
try {
|
|
7410
|
-
|
|
7411
|
-
|
|
7412
|
-
|
|
7413
|
-
|
|
7382
|
+
let modelSpec = embeddingModel;
|
|
7383
|
+
if (!modelSpec && process.env.OPENAI_API_KEY) {
|
|
7384
|
+
modelSpec = "text-embedding-3-small";
|
|
7385
|
+
logger.debug(
|
|
7386
|
+
"Using default OpenAI embedding model due to OPENAI_API_KEY presence."
|
|
7387
|
+
);
|
|
7388
|
+
}
|
|
7389
|
+
if (!modelSpec) {
|
|
7390
|
+
logger.debug(
|
|
7391
|
+
"No embedding model specified and OPENAI_API_KEY not found. Embeddings are disabled."
|
|
7392
|
+
);
|
|
7393
|
+
return null;
|
|
7394
|
+
}
|
|
7395
|
+
logger.debug(`Resolving embedding configuration for model: ${modelSpec}`);
|
|
7396
|
+
return EmbeddingConfig.parseEmbeddingConfig(modelSpec);
|
|
7414
7397
|
} catch (error) {
|
|
7415
7398
|
logger.debug(`Failed to resolve embedding configuration: ${error}`);
|
|
7416
7399
|
return null;
|
|
@@ -7431,55 +7414,42 @@ class CancelJobTool {
|
|
|
7431
7414
|
* @returns A promise that resolves with the outcome message.
|
|
7432
7415
|
*/
|
|
7433
7416
|
async execute(input) {
|
|
7434
|
-
|
|
7435
|
-
|
|
7436
|
-
|
|
7437
|
-
|
|
7438
|
-
const job = await this.pipeline.getJob(input.jobId);
|
|
7439
|
-
if (!job) {
|
|
7440
|
-
logger.warn(`❓ [CancelJobTool] Job not found: ${input.jobId}`);
|
|
7441
|
-
return {
|
|
7442
|
-
message: `Job with ID ${input.jobId} not found.`,
|
|
7443
|
-
success: false
|
|
7444
|
-
};
|
|
7445
|
-
}
|
|
7446
|
-
if (job.status === PipelineJobStatus.COMPLETED || // Use enum member
|
|
7447
|
-
job.status === PipelineJobStatus.FAILED || // Use enum member
|
|
7448
|
-
job.status === PipelineJobStatus.CANCELLED) {
|
|
7449
|
-
logger.debug(
|
|
7450
|
-
`Job ${input.jobId} is already in a final state: ${job.status}.`
|
|
7451
|
-
);
|
|
7452
|
-
return {
|
|
7453
|
-
message: `Job ${input.jobId} is already ${job.status}. No action taken.`,
|
|
7454
|
-
success: true
|
|
7455
|
-
// Considered success as no cancellation needed
|
|
7456
|
-
};
|
|
7457
|
-
}
|
|
7458
|
-
await this.pipeline.cancelJob(input.jobId);
|
|
7459
|
-
const updatedJob = await this.pipeline.getJob(input.jobId);
|
|
7460
|
-
const finalStatus = updatedJob?.status ?? "UNKNOWN (job disappeared?)";
|
|
7461
|
-
logger.debug(
|
|
7462
|
-
`Cancellation requested for job ${input.jobId}. Current status: ${finalStatus}`
|
|
7463
|
-
);
|
|
7464
|
-
return {
|
|
7465
|
-
message: `Cancellation requested for job ${input.jobId}. Current status: ${finalStatus}.`,
|
|
7466
|
-
success: true
|
|
7467
|
-
};
|
|
7468
|
-
} catch (error) {
|
|
7469
|
-
logger.error(`❌ Error cancelling job ${input.jobId}: ${error}`);
|
|
7470
|
-
return {
|
|
7471
|
-
message: `Failed to cancel job ${input.jobId}: ${error instanceof Error ? error.message : String(error)}`,
|
|
7472
|
-
success: false
|
|
7473
|
-
};
|
|
7474
|
-
}
|
|
7475
|
-
},
|
|
7476
|
-
(result) => {
|
|
7417
|
+
try {
|
|
7418
|
+
const job = await this.pipeline.getJob(input.jobId);
|
|
7419
|
+
if (!job) {
|
|
7420
|
+
logger.warn(`❓ [CancelJobTool] Job not found: ${input.jobId}`);
|
|
7477
7421
|
return {
|
|
7478
|
-
|
|
7479
|
-
|
|
7422
|
+
message: `Job with ID ${input.jobId} not found.`,
|
|
7423
|
+
success: false
|
|
7480
7424
|
};
|
|
7481
7425
|
}
|
|
7482
|
-
|
|
7426
|
+
if (job.status === PipelineJobStatus.COMPLETED || // Use enum member
|
|
7427
|
+
job.status === PipelineJobStatus.FAILED || // Use enum member
|
|
7428
|
+
job.status === PipelineJobStatus.CANCELLED) {
|
|
7429
|
+
logger.debug(`Job ${input.jobId} is already in a final state: ${job.status}.`);
|
|
7430
|
+
return {
|
|
7431
|
+
message: `Job ${input.jobId} is already ${job.status}. No action taken.`,
|
|
7432
|
+
success: true
|
|
7433
|
+
// Considered success as no cancellation needed
|
|
7434
|
+
};
|
|
7435
|
+
}
|
|
7436
|
+
await this.pipeline.cancelJob(input.jobId);
|
|
7437
|
+
const updatedJob = await this.pipeline.getJob(input.jobId);
|
|
7438
|
+
const finalStatus = updatedJob?.status ?? "UNKNOWN (job disappeared?)";
|
|
7439
|
+
logger.debug(
|
|
7440
|
+
`Cancellation requested for job ${input.jobId}. Current status: ${finalStatus}`
|
|
7441
|
+
);
|
|
7442
|
+
return {
|
|
7443
|
+
message: `Cancellation requested for job ${input.jobId}. Current status: ${finalStatus}.`,
|
|
7444
|
+
success: true
|
|
7445
|
+
};
|
|
7446
|
+
} catch (error) {
|
|
7447
|
+
logger.error(`❌ Error cancelling job ${input.jobId}: ${error}`);
|
|
7448
|
+
return {
|
|
7449
|
+
message: `Failed to cancel job ${input.jobId}: ${error instanceof Error ? error.message : String(error)}`,
|
|
7450
|
+
success: false
|
|
7451
|
+
};
|
|
7452
|
+
}
|
|
7483
7453
|
}
|
|
7484
7454
|
}
|
|
7485
7455
|
class ClearCompletedJobsTool {
|
|
@@ -7497,33 +7467,24 @@ class ClearCompletedJobsTool {
|
|
|
7497
7467
|
* @returns A promise that resolves with the outcome of the clear operation.
|
|
7498
7468
|
*/
|
|
7499
7469
|
async execute(_input) {
|
|
7500
|
-
|
|
7501
|
-
|
|
7502
|
-
|
|
7503
|
-
|
|
7504
|
-
|
|
7505
|
-
|
|
7506
|
-
|
|
7507
|
-
|
|
7508
|
-
|
|
7509
|
-
|
|
7510
|
-
|
|
7511
|
-
|
|
7512
|
-
|
|
7513
|
-
|
|
7514
|
-
|
|
7515
|
-
|
|
7516
|
-
|
|
7517
|
-
|
|
7518
|
-
clearedCount: 0
|
|
7519
|
-
};
|
|
7520
|
-
}
|
|
7521
|
-
},
|
|
7522
|
-
(result) => ({
|
|
7523
|
-
success: result.success,
|
|
7524
|
-
clearedCount: result.clearedCount
|
|
7525
|
-
})
|
|
7526
|
-
);
|
|
7470
|
+
try {
|
|
7471
|
+
const clearedCount = await this.pipeline.clearCompletedJobs();
|
|
7472
|
+
const message = clearedCount > 0 ? `Successfully cleared ${clearedCount} completed job${clearedCount === 1 ? "" : "s"} from the queue.` : "No completed jobs to clear.";
|
|
7473
|
+
logger.debug(message);
|
|
7474
|
+
return {
|
|
7475
|
+
message,
|
|
7476
|
+
success: true,
|
|
7477
|
+
clearedCount
|
|
7478
|
+
};
|
|
7479
|
+
} catch (error) {
|
|
7480
|
+
const errorMessage = `Failed to clear completed jobs: ${error instanceof Error ? error.message : String(error)}`;
|
|
7481
|
+
logger.error(`❌ ${errorMessage}`);
|
|
7482
|
+
return {
|
|
7483
|
+
message: errorMessage,
|
|
7484
|
+
success: false,
|
|
7485
|
+
clearedCount: 0
|
|
7486
|
+
};
|
|
7487
|
+
}
|
|
7527
7488
|
}
|
|
7528
7489
|
}
|
|
7529
7490
|
class ToolError extends Error {
|
|
@@ -7583,103 +7544,88 @@ class FetchUrlTool {
|
|
|
7583
7544
|
* @throws {ToolError} If fetching or processing fails
|
|
7584
7545
|
*/
|
|
7585
7546
|
async execute(options) {
|
|
7586
|
-
|
|
7587
|
-
|
|
7588
|
-
|
|
7589
|
-
|
|
7590
|
-
|
|
7591
|
-
|
|
7592
|
-
|
|
7593
|
-
|
|
7594
|
-
|
|
7595
|
-
|
|
7596
|
-
|
|
7597
|
-
|
|
7598
|
-
|
|
7599
|
-
|
|
7600
|
-
|
|
7601
|
-
|
|
7602
|
-
|
|
7603
|
-
|
|
7604
|
-
|
|
7605
|
-
|
|
7606
|
-
|
|
7607
|
-
|
|
7608
|
-
|
|
7609
|
-
|
|
7610
|
-
|
|
7611
|
-
|
|
7612
|
-
|
|
7613
|
-
|
|
7614
|
-
|
|
7615
|
-
|
|
7616
|
-
|
|
7617
|
-
|
|
7618
|
-
|
|
7619
|
-
|
|
7620
|
-
|
|
7621
|
-
|
|
7622
|
-
|
|
7623
|
-
|
|
7624
|
-
|
|
7625
|
-
|
|
7626
|
-
|
|
7627
|
-
// propagate custom headers
|
|
7628
|
-
},
|
|
7629
|
-
fetcher
|
|
7630
|
-
);
|
|
7631
|
-
break;
|
|
7632
|
-
}
|
|
7633
|
-
}
|
|
7634
|
-
if (!processed) {
|
|
7635
|
-
logger.warn(
|
|
7636
|
-
`⚠️ Unsupported content type "${rawContent.mimeType}" for ${url}. Returning raw content.`
|
|
7637
|
-
);
|
|
7638
|
-
const resolvedCharset = resolveCharset(
|
|
7639
|
-
rawContent.charset,
|
|
7640
|
-
rawContent.content,
|
|
7641
|
-
rawContent.mimeType
|
|
7642
|
-
);
|
|
7643
|
-
const contentString = convertToString(rawContent.content, resolvedCharset);
|
|
7644
|
-
return contentString;
|
|
7645
|
-
}
|
|
7646
|
-
for (const err of processed.errors) {
|
|
7647
|
-
logger.warn(`⚠️ Processing error for ${url}: ${err.message}`);
|
|
7648
|
-
}
|
|
7649
|
-
if (typeof processed.textContent !== "string" || !processed.textContent.trim()) {
|
|
7650
|
-
throw new ToolError(
|
|
7651
|
-
`Processing resulted in empty content for ${url}`,
|
|
7652
|
-
this.constructor.name
|
|
7653
|
-
);
|
|
7654
|
-
}
|
|
7655
|
-
logger.info(`✅ Successfully processed ${url}`);
|
|
7656
|
-
return processed.textContent;
|
|
7657
|
-
} catch (error) {
|
|
7658
|
-
if (error instanceof ScraperError || error instanceof ToolError) {
|
|
7659
|
-
throw new ToolError(
|
|
7660
|
-
`Failed to fetch or process URL: ${error.message}`,
|
|
7661
|
-
this.constructor.name
|
|
7662
|
-
);
|
|
7663
|
-
}
|
|
7664
|
-
throw new ToolError(
|
|
7665
|
-
`Failed to fetch or process URL: ${error instanceof Error ? error.message : String(error)}`,
|
|
7666
|
-
this.constructor.name
|
|
7547
|
+
const { url, scrapeMode = ScrapeMode.Auto, headers } = options;
|
|
7548
|
+
const canFetchResults = this.fetchers.map((f) => f.canFetch(url));
|
|
7549
|
+
const fetcherIndex = canFetchResults.indexOf(true);
|
|
7550
|
+
if (fetcherIndex === -1) {
|
|
7551
|
+
throw new ToolError(
|
|
7552
|
+
`Invalid URL: ${url}. Must be an HTTP/HTTPS URL or a file:// URL.`,
|
|
7553
|
+
this.constructor.name
|
|
7554
|
+
);
|
|
7555
|
+
}
|
|
7556
|
+
const fetcher = this.fetchers[fetcherIndex];
|
|
7557
|
+
logger.debug(`Using fetcher "${fetcher.constructor.name}" for URL: ${url}`);
|
|
7558
|
+
try {
|
|
7559
|
+
logger.info(`📡 Fetching ${url}...`);
|
|
7560
|
+
const rawContent = await fetcher.fetch(url, {
|
|
7561
|
+
followRedirects: options.followRedirects ?? true,
|
|
7562
|
+
maxRetries: 3,
|
|
7563
|
+
headers
|
|
7564
|
+
// propagate custom headers
|
|
7565
|
+
});
|
|
7566
|
+
logger.info("🔄 Processing content...");
|
|
7567
|
+
let processed;
|
|
7568
|
+
for (const pipeline of this.pipelines) {
|
|
7569
|
+
if (pipeline.canProcess(rawContent)) {
|
|
7570
|
+
processed = await pipeline.process(
|
|
7571
|
+
rawContent,
|
|
7572
|
+
{
|
|
7573
|
+
url,
|
|
7574
|
+
library: "",
|
|
7575
|
+
version: "",
|
|
7576
|
+
maxDepth: 0,
|
|
7577
|
+
maxPages: 1,
|
|
7578
|
+
maxConcurrency: 1,
|
|
7579
|
+
scope: "subpages",
|
|
7580
|
+
followRedirects: options.followRedirects ?? true,
|
|
7581
|
+
excludeSelectors: void 0,
|
|
7582
|
+
ignoreErrors: false,
|
|
7583
|
+
scrapeMode,
|
|
7584
|
+
headers
|
|
7585
|
+
// propagate custom headers
|
|
7586
|
+
},
|
|
7587
|
+
fetcher
|
|
7667
7588
|
);
|
|
7668
|
-
|
|
7669
|
-
await Promise.allSettled(this.pipelines.map((pipeline) => pipeline.close()));
|
|
7589
|
+
break;
|
|
7670
7590
|
}
|
|
7671
|
-
},
|
|
7672
|
-
(result) => {
|
|
7673
|
-
const { url, scrapeMode, followRedirects, headers } = options;
|
|
7674
|
-
return {
|
|
7675
|
-
url,
|
|
7676
|
-
scrapeMode,
|
|
7677
|
-
followRedirects,
|
|
7678
|
-
contentLength: result.length,
|
|
7679
|
-
hasHeaders: !!headers
|
|
7680
|
-
};
|
|
7681
7591
|
}
|
|
7682
|
-
|
|
7592
|
+
if (!processed) {
|
|
7593
|
+
logger.warn(
|
|
7594
|
+
`⚠️ Unsupported content type "${rawContent.mimeType}" for ${url}. Returning raw content.`
|
|
7595
|
+
);
|
|
7596
|
+
const resolvedCharset = resolveCharset(
|
|
7597
|
+
rawContent.charset,
|
|
7598
|
+
rawContent.content,
|
|
7599
|
+
rawContent.mimeType
|
|
7600
|
+
);
|
|
7601
|
+
const contentString = convertToString(rawContent.content, resolvedCharset);
|
|
7602
|
+
return contentString;
|
|
7603
|
+
}
|
|
7604
|
+
for (const err of processed.errors) {
|
|
7605
|
+
logger.warn(`⚠️ Processing error for ${url}: ${err.message}`);
|
|
7606
|
+
}
|
|
7607
|
+
if (typeof processed.textContent !== "string" || !processed.textContent.trim()) {
|
|
7608
|
+
throw new ToolError(
|
|
7609
|
+
`Processing resulted in empty content for ${url}`,
|
|
7610
|
+
this.constructor.name
|
|
7611
|
+
);
|
|
7612
|
+
}
|
|
7613
|
+
logger.info(`✅ Successfully processed ${url}`);
|
|
7614
|
+
return processed.textContent;
|
|
7615
|
+
} catch (error) {
|
|
7616
|
+
if (error instanceof ScraperError || error instanceof ToolError) {
|
|
7617
|
+
throw new ToolError(
|
|
7618
|
+
`Failed to fetch or process URL: ${error.message}`,
|
|
7619
|
+
this.constructor.name
|
|
7620
|
+
);
|
|
7621
|
+
}
|
|
7622
|
+
throw new ToolError(
|
|
7623
|
+
`Failed to fetch or process URL: ${error instanceof Error ? error.message : String(error)}`,
|
|
7624
|
+
this.constructor.name
|
|
7625
|
+
);
|
|
7626
|
+
} finally {
|
|
7627
|
+
await Promise.allSettled(this.pipelines.map((pipeline) => pipeline.close()));
|
|
7628
|
+
}
|
|
7683
7629
|
}
|
|
7684
7630
|
}
|
|
7685
7631
|
class FindVersionTool {
|
|
@@ -7692,50 +7638,36 @@ class FindVersionTool {
|
|
|
7692
7638
|
* @returns A descriptive string indicating the best match and unversioned status, or an error message.
|
|
7693
7639
|
*/
|
|
7694
7640
|
async execute(options) {
|
|
7695
|
-
|
|
7696
|
-
|
|
7697
|
-
|
|
7698
|
-
|
|
7699
|
-
|
|
7700
|
-
|
|
7701
|
-
|
|
7702
|
-
|
|
7703
|
-
|
|
7704
|
-
|
|
7705
|
-
|
|
7706
|
-
|
|
7707
|
-
|
|
7708
|
-
|
|
7709
|
-
|
|
7710
|
-
|
|
7711
|
-
|
|
7712
|
-
|
|
7713
|
-
|
|
7714
|
-
|
|
7715
|
-
|
|
7716
|
-
|
|
7717
|
-
}
|
|
7718
|
-
|
|
7719
|
-
logger.info(`ℹ️ Version not found: ${error.message}`);
|
|
7720
|
-
const message = `No matching version or unversioned documents found for ${libraryAndVersion}. Available: ${error.availableVersions.length > 0 ? error.availableVersions.map((v) => v.version).join(", ") : "None"}.`;
|
|
7721
|
-
return { message, bestMatch: null, hasUnversioned: false };
|
|
7722
|
-
}
|
|
7723
|
-
logger.error(
|
|
7724
|
-
`❌ Error finding version for ${libraryAndVersion}: ${error instanceof Error ? error.message : error}`
|
|
7725
|
-
);
|
|
7726
|
-
throw error;
|
|
7727
|
-
}
|
|
7728
|
-
},
|
|
7729
|
-
(result) => {
|
|
7730
|
-
const { library, targetVersion } = options;
|
|
7731
|
-
return {
|
|
7732
|
-
library,
|
|
7733
|
-
targetVersion,
|
|
7734
|
-
foundMatch: !!result.bestMatch,
|
|
7735
|
-
hasUnversioned: result.hasUnversioned
|
|
7736
|
-
};
|
|
7641
|
+
const { library, targetVersion } = options;
|
|
7642
|
+
const libraryAndVersion = `${library}${targetVersion ? `@${targetVersion}` : ""}`;
|
|
7643
|
+
try {
|
|
7644
|
+
const { bestMatch, hasUnversioned } = await this.docService.findBestVersion(
|
|
7645
|
+
library,
|
|
7646
|
+
targetVersion
|
|
7647
|
+
);
|
|
7648
|
+
let message = "";
|
|
7649
|
+
if (bestMatch) {
|
|
7650
|
+
message = `Best match: ${bestMatch}.`;
|
|
7651
|
+
if (hasUnversioned) {
|
|
7652
|
+
message += " Unversioned docs also available.";
|
|
7653
|
+
}
|
|
7654
|
+
} else if (hasUnversioned) {
|
|
7655
|
+
message = `No matching version found for ${libraryAndVersion}, but unversioned docs exist.`;
|
|
7656
|
+
} else {
|
|
7657
|
+
message = `No matching version or unversioned documents found for ${libraryAndVersion}.`;
|
|
7658
|
+
}
|
|
7659
|
+
return message;
|
|
7660
|
+
} catch (error) {
|
|
7661
|
+
if (error instanceof VersionNotFoundError) {
|
|
7662
|
+
logger.info(`ℹ️ Version not found: ${error.message}`);
|
|
7663
|
+
const message = `No matching version or unversioned documents found for ${libraryAndVersion}. Available: ${error.availableVersions.length > 0 ? error.availableVersions.map((v) => v.version).join(", ") : "None"}.`;
|
|
7664
|
+
return message;
|
|
7737
7665
|
}
|
|
7738
|
-
|
|
7666
|
+
logger.error(
|
|
7667
|
+
`❌ Error finding version for ${libraryAndVersion}: ${error instanceof Error ? error.message : error}`
|
|
7668
|
+
);
|
|
7669
|
+
throw error;
|
|
7670
|
+
}
|
|
7739
7671
|
}
|
|
7740
7672
|
}
|
|
7741
7673
|
class GetJobInfoTool {
|
|
@@ -7753,41 +7685,29 @@ class GetJobInfoTool {
|
|
|
7753
7685
|
* @returns A promise that resolves with the simplified job info or null if not found.
|
|
7754
7686
|
*/
|
|
7755
7687
|
async execute(input) {
|
|
7756
|
-
|
|
7757
|
-
|
|
7758
|
-
|
|
7759
|
-
|
|
7760
|
-
|
|
7761
|
-
|
|
7762
|
-
|
|
7763
|
-
|
|
7764
|
-
|
|
7765
|
-
|
|
7766
|
-
|
|
7767
|
-
|
|
7768
|
-
|
|
7769
|
-
|
|
7770
|
-
|
|
7771
|
-
|
|
7772
|
-
|
|
7773
|
-
|
|
7774
|
-
|
|
7775
|
-
|
|
7776
|
-
|
|
7777
|
-
|
|
7778
|
-
|
|
7779
|
-
errorMessage: job.errorMessage ?? void 0
|
|
7780
|
-
};
|
|
7781
|
-
return { job: jobInfo };
|
|
7782
|
-
},
|
|
7783
|
-
(result) => {
|
|
7784
|
-
return {
|
|
7785
|
-
found: result.job !== null,
|
|
7786
|
-
library: result.job?.library,
|
|
7787
|
-
version: result.job?.version
|
|
7788
|
-
};
|
|
7789
|
-
}
|
|
7790
|
-
);
|
|
7688
|
+
const job = await this.pipeline.getJob(input.jobId);
|
|
7689
|
+
if (!job) {
|
|
7690
|
+
return { job: null };
|
|
7691
|
+
}
|
|
7692
|
+
const jobInfo = {
|
|
7693
|
+
id: job.id,
|
|
7694
|
+
library: job.library,
|
|
7695
|
+
version: job.version,
|
|
7696
|
+
status: job.status,
|
|
7697
|
+
dbStatus: job.versionStatus,
|
|
7698
|
+
createdAt: job.createdAt.toISOString(),
|
|
7699
|
+
startedAt: job.startedAt?.toISOString() ?? null,
|
|
7700
|
+
finishedAt: job.finishedAt?.toISOString() ?? null,
|
|
7701
|
+
error: job.error?.message ?? null,
|
|
7702
|
+
progress: job.progressMaxPages && job.progressMaxPages > 0 ? {
|
|
7703
|
+
pages: job.progressPages || 0,
|
|
7704
|
+
totalPages: job.progressMaxPages,
|
|
7705
|
+
totalDiscovered: job.progress?.totalDiscovered || job.progressMaxPages
|
|
7706
|
+
} : void 0,
|
|
7707
|
+
updatedAt: job.updatedAt?.toISOString(),
|
|
7708
|
+
errorMessage: job.errorMessage ?? void 0
|
|
7709
|
+
};
|
|
7710
|
+
return { job: jobInfo };
|
|
7791
7711
|
}
|
|
7792
7712
|
}
|
|
7793
7713
|
class ListJobsTool {
|
|
@@ -7805,45 +7725,28 @@ class ListJobsTool {
|
|
|
7805
7725
|
* @returns A promise that resolves with the list of simplified job objects.
|
|
7806
7726
|
*/
|
|
7807
7727
|
async execute(input) {
|
|
7808
|
-
|
|
7809
|
-
|
|
7810
|
-
|
|
7811
|
-
|
|
7812
|
-
|
|
7813
|
-
|
|
7814
|
-
|
|
7815
|
-
|
|
7816
|
-
|
|
7817
|
-
|
|
7818
|
-
|
|
7819
|
-
|
|
7820
|
-
|
|
7821
|
-
|
|
7822
|
-
|
|
7823
|
-
|
|
7824
|
-
|
|
7825
|
-
|
|
7826
|
-
|
|
7827
|
-
|
|
7828
|
-
|
|
7829
|
-
|
|
7830
|
-
};
|
|
7831
|
-
});
|
|
7832
|
-
return { jobs: simplifiedJobs };
|
|
7833
|
-
},
|
|
7834
|
-
(result) => {
|
|
7835
|
-
return {
|
|
7836
|
-
jobCount: result.jobs.length,
|
|
7837
|
-
statusCounts: result.jobs.reduce(
|
|
7838
|
-
(acc, job) => {
|
|
7839
|
-
acc[job.status] = (acc[job.status] || 0) + 1;
|
|
7840
|
-
return acc;
|
|
7841
|
-
},
|
|
7842
|
-
{}
|
|
7843
|
-
)
|
|
7844
|
-
};
|
|
7845
|
-
}
|
|
7846
|
-
);
|
|
7728
|
+
const jobs = await this.pipeline.getJobs(input.status);
|
|
7729
|
+
const simplifiedJobs = jobs.map((job) => {
|
|
7730
|
+
return {
|
|
7731
|
+
id: job.id,
|
|
7732
|
+
library: job.library,
|
|
7733
|
+
version: job.version,
|
|
7734
|
+
status: job.status,
|
|
7735
|
+
dbStatus: job.versionStatus,
|
|
7736
|
+
createdAt: job.createdAt.toISOString(),
|
|
7737
|
+
startedAt: job.startedAt?.toISOString() ?? null,
|
|
7738
|
+
finishedAt: job.finishedAt?.toISOString() ?? null,
|
|
7739
|
+
error: job.error?.message ?? null,
|
|
7740
|
+
progress: job.progressMaxPages && job.progressMaxPages > 0 ? {
|
|
7741
|
+
pages: job.progressPages || 0,
|
|
7742
|
+
totalPages: job.progressMaxPages,
|
|
7743
|
+
totalDiscovered: job.progress?.totalDiscovered || job.progressMaxPages
|
|
7744
|
+
} : void 0,
|
|
7745
|
+
updatedAt: job.updatedAt?.toISOString(),
|
|
7746
|
+
errorMessage: job.errorMessage ?? void 0
|
|
7747
|
+
};
|
|
7748
|
+
});
|
|
7749
|
+
return { jobs: simplifiedJobs };
|
|
7847
7750
|
}
|
|
7848
7751
|
}
|
|
7849
7752
|
class ListLibrariesTool {
|
|
@@ -7852,32 +7755,20 @@ class ListLibrariesTool {
|
|
|
7852
7755
|
this.docService = docService;
|
|
7853
7756
|
}
|
|
7854
7757
|
async execute(_options) {
|
|
7855
|
-
|
|
7856
|
-
|
|
7857
|
-
|
|
7858
|
-
|
|
7859
|
-
|
|
7860
|
-
|
|
7861
|
-
|
|
7862
|
-
|
|
7863
|
-
|
|
7864
|
-
|
|
7865
|
-
|
|
7866
|
-
|
|
7867
|
-
|
|
7868
|
-
|
|
7869
|
-
}))
|
|
7870
|
-
}));
|
|
7871
|
-
return { libraries };
|
|
7872
|
-
},
|
|
7873
|
-
(result) => ({
|
|
7874
|
-
libraryCount: result.libraries.length,
|
|
7875
|
-
totalVersions: result.libraries.reduce(
|
|
7876
|
-
(sum, lib) => sum + lib.versions.length,
|
|
7877
|
-
0
|
|
7878
|
-
)
|
|
7879
|
-
})
|
|
7880
|
-
);
|
|
7758
|
+
const rawLibraries = await this.docService.listLibraries();
|
|
7759
|
+
const libraries = rawLibraries.map(({ library, versions }) => ({
|
|
7760
|
+
name: library,
|
|
7761
|
+
versions: versions.map((v) => ({
|
|
7762
|
+
version: v.ref.version,
|
|
7763
|
+
documentCount: v.counts.documents,
|
|
7764
|
+
uniqueUrlCount: v.counts.uniqueUrls,
|
|
7765
|
+
indexedAt: v.indexedAt,
|
|
7766
|
+
status: v.status,
|
|
7767
|
+
...v.progress ? { progress: v.progress } : void 0,
|
|
7768
|
+
sourceUrl: v.sourceUrl
|
|
7769
|
+
}))
|
|
7770
|
+
}));
|
|
7771
|
+
return { libraries };
|
|
7881
7772
|
}
|
|
7882
7773
|
}
|
|
7883
7774
|
class RemoveTool {
|
|
@@ -7891,42 +7782,29 @@ class RemoveTool {
|
|
|
7891
7782
|
* Removes all documents, the version record, and the library if no other versions exist.
|
|
7892
7783
|
*/
|
|
7893
7784
|
async execute(args) {
|
|
7894
|
-
|
|
7895
|
-
|
|
7896
|
-
|
|
7897
|
-
|
|
7898
|
-
|
|
7899
|
-
|
|
7900
|
-
|
|
7901
|
-
|
|
7902
|
-
|
|
7903
|
-
|
|
7904
|
-
|
|
7905
|
-
|
|
7906
|
-
|
|
7907
|
-
);
|
|
7908
|
-
await this.pipeline.cancelJob(job.id);
|
|
7909
|
-
await this.pipeline.waitForJobCompletion(job.id);
|
|
7910
|
-
}
|
|
7911
|
-
await this.documentManagementService.removeVersion(library, version2);
|
|
7912
|
-
const message = `Successfully removed ${library}${version2 ? `@${version2}` : ""}.`;
|
|
7913
|
-
logger.info(`✅ ${message}`);
|
|
7914
|
-
return { message };
|
|
7915
|
-
} catch (error) {
|
|
7916
|
-
const errorMessage = `Failed to remove ${library}${version2 ? `@${version2}` : ""}: ${error instanceof Error ? error.message : String(error)}`;
|
|
7917
|
-
logger.error(`❌ Error removing library: ${errorMessage}`);
|
|
7918
|
-
throw new ToolError(errorMessage, this.constructor.name);
|
|
7919
|
-
}
|
|
7920
|
-
},
|
|
7921
|
-
() => {
|
|
7922
|
-
const { library, version: version2 } = args;
|
|
7923
|
-
return {
|
|
7924
|
-
library,
|
|
7925
|
-
version: version2
|
|
7926
|
-
// Success is implicit since if this callback runs, no exception was thrown
|
|
7927
|
-
};
|
|
7785
|
+
const { library, version: version2 } = args;
|
|
7786
|
+
logger.info(`🗑️ Removing library: ${library}${version2 ? `@${version2}` : ""}`);
|
|
7787
|
+
try {
|
|
7788
|
+
const allJobs = await this.pipeline.getJobs();
|
|
7789
|
+
const jobs = allJobs.filter(
|
|
7790
|
+
(job) => job.library === library && job.version === (version2 ?? "") && (job.status === PipelineJobStatus.QUEUED || job.status === PipelineJobStatus.RUNNING)
|
|
7791
|
+
);
|
|
7792
|
+
for (const job of jobs) {
|
|
7793
|
+
logger.info(
|
|
7794
|
+
`🚫 Aborting job for ${library}@${version2 ?? ""} before deletion: ${job.id}`
|
|
7795
|
+
);
|
|
7796
|
+
await this.pipeline.cancelJob(job.id);
|
|
7797
|
+
await this.pipeline.waitForJobCompletion(job.id);
|
|
7928
7798
|
}
|
|
7929
|
-
|
|
7799
|
+
await this.documentManagementService.removeVersion(library, version2);
|
|
7800
|
+
const message = `Successfully removed ${library}${version2 ? `@${version2}` : ""}.`;
|
|
7801
|
+
logger.info(`✅ ${message}`);
|
|
7802
|
+
return { message };
|
|
7803
|
+
} catch (error) {
|
|
7804
|
+
const errorMessage = `Failed to remove ${library}${version2 ? `@${version2}` : ""}: ${error instanceof Error ? error.message : String(error)}`;
|
|
7805
|
+
logger.error(`❌ Error removing library: ${errorMessage}`);
|
|
7806
|
+
throw new ToolError(errorMessage, this.constructor.name);
|
|
7807
|
+
}
|
|
7930
7808
|
}
|
|
7931
7809
|
}
|
|
7932
7810
|
class ScrapeTool {
|
|
@@ -7942,80 +7820,66 @@ class ScrapeTool {
|
|
|
7942
7820
|
options: scraperOptions,
|
|
7943
7821
|
waitForCompletion = true
|
|
7944
7822
|
} = options;
|
|
7945
|
-
|
|
7946
|
-
|
|
7947
|
-
|
|
7948
|
-
|
|
7949
|
-
|
|
7950
|
-
|
|
7951
|
-
|
|
7823
|
+
let internalVersion;
|
|
7824
|
+
const partialVersionRegex = /^\d+(\.\d+)?$/;
|
|
7825
|
+
if (version2 === null || version2 === void 0) {
|
|
7826
|
+
internalVersion = "";
|
|
7827
|
+
} else {
|
|
7828
|
+
const validFullVersion = semver.valid(version2);
|
|
7829
|
+
if (validFullVersion) {
|
|
7830
|
+
internalVersion = validFullVersion;
|
|
7831
|
+
} else if (partialVersionRegex.test(version2)) {
|
|
7832
|
+
const coercedVersion = semver.coerce(version2);
|
|
7833
|
+
if (coercedVersion) {
|
|
7834
|
+
internalVersion = coercedVersion.version;
|
|
7952
7835
|
} else {
|
|
7953
|
-
|
|
7954
|
-
|
|
7955
|
-
|
|
7956
|
-
} else if (partialVersionRegex.test(version2)) {
|
|
7957
|
-
const coercedVersion = semver.coerce(version2);
|
|
7958
|
-
if (coercedVersion) {
|
|
7959
|
-
internalVersion = coercedVersion.version;
|
|
7960
|
-
} else {
|
|
7961
|
-
throw new Error(
|
|
7962
|
-
`Invalid version format for scraping: '${version2}'. Use 'X.Y.Z', 'X.Y.Z-prerelease', 'X.Y', 'X', or omit.`
|
|
7963
|
-
);
|
|
7964
|
-
}
|
|
7965
|
-
} else {
|
|
7966
|
-
throw new Error(
|
|
7967
|
-
`Invalid version format for scraping: '${version2}'. Use 'X.Y.Z', 'X.Y.Z-prerelease', 'X.Y', 'X', or omit.`
|
|
7968
|
-
);
|
|
7969
|
-
}
|
|
7970
|
-
}
|
|
7971
|
-
internalVersion = internalVersion.toLowerCase();
|
|
7972
|
-
const pipeline = this.pipeline;
|
|
7973
|
-
const enqueueVersion = internalVersion === "" ? null : internalVersion;
|
|
7974
|
-
const jobId = await pipeline.enqueueJob(library, enqueueVersion, {
|
|
7975
|
-
url,
|
|
7976
|
-
library,
|
|
7977
|
-
version: internalVersion,
|
|
7978
|
-
scope: scraperOptions?.scope ?? "subpages",
|
|
7979
|
-
followRedirects: scraperOptions?.followRedirects ?? true,
|
|
7980
|
-
maxPages: scraperOptions?.maxPages ?? DEFAULT_MAX_PAGES,
|
|
7981
|
-
maxDepth: scraperOptions?.maxDepth ?? DEFAULT_MAX_DEPTH$1,
|
|
7982
|
-
maxConcurrency: scraperOptions?.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY,
|
|
7983
|
-
ignoreErrors: scraperOptions?.ignoreErrors ?? true,
|
|
7984
|
-
scrapeMode: scraperOptions?.scrapeMode ?? ScrapeMode.Auto,
|
|
7985
|
-
// Pass scrapeMode enum
|
|
7986
|
-
includePatterns: scraperOptions?.includePatterns,
|
|
7987
|
-
excludePatterns: scraperOptions?.excludePatterns,
|
|
7988
|
-
headers: scraperOptions?.headers
|
|
7989
|
-
// <-- propagate headers
|
|
7990
|
-
});
|
|
7991
|
-
if (waitForCompletion) {
|
|
7992
|
-
try {
|
|
7993
|
-
await pipeline.waitForJobCompletion(jobId);
|
|
7994
|
-
const finalJob = await pipeline.getJob(jobId);
|
|
7995
|
-
const finalPagesScraped = finalJob?.progress?.pagesScraped ?? 0;
|
|
7996
|
-
logger.debug(
|
|
7997
|
-
`Job ${jobId} finished with status ${finalJob?.status}. Pages scraped: ${finalPagesScraped}`
|
|
7998
|
-
);
|
|
7999
|
-
return {
|
|
8000
|
-
pagesScraped: finalPagesScraped
|
|
8001
|
-
};
|
|
8002
|
-
} catch (error) {
|
|
8003
|
-
logger.error(`❌ Job ${jobId} failed or was cancelled: ${error}`);
|
|
8004
|
-
throw error;
|
|
8005
|
-
}
|
|
7836
|
+
throw new Error(
|
|
7837
|
+
`Invalid version format for scraping: '${version2}'. Use 'X.Y.Z', 'X.Y.Z-prerelease', 'X.Y', 'X', or omit.`
|
|
7838
|
+
);
|
|
8006
7839
|
}
|
|
8007
|
-
|
|
8008
|
-
|
|
8009
|
-
|
|
8010
|
-
|
|
8011
|
-
|
|
8012
|
-
|
|
8013
|
-
|
|
8014
|
-
|
|
8015
|
-
|
|
8016
|
-
|
|
8017
|
-
|
|
8018
|
-
|
|
7840
|
+
} else {
|
|
7841
|
+
throw new Error(
|
|
7842
|
+
`Invalid version format for scraping: '${version2}'. Use 'X.Y.Z', 'X.Y.Z-prerelease', 'X.Y', 'X', or omit.`
|
|
7843
|
+
);
|
|
7844
|
+
}
|
|
7845
|
+
}
|
|
7846
|
+
internalVersion = internalVersion.toLowerCase();
|
|
7847
|
+
const pipeline = this.pipeline;
|
|
7848
|
+
const enqueueVersion = internalVersion === "" ? null : internalVersion;
|
|
7849
|
+
const jobId = await pipeline.enqueueJob(library, enqueueVersion, {
|
|
7850
|
+
url,
|
|
7851
|
+
library,
|
|
7852
|
+
version: internalVersion,
|
|
7853
|
+
scope: scraperOptions?.scope ?? "subpages",
|
|
7854
|
+
followRedirects: scraperOptions?.followRedirects ?? true,
|
|
7855
|
+
maxPages: scraperOptions?.maxPages ?? DEFAULT_MAX_PAGES,
|
|
7856
|
+
maxDepth: scraperOptions?.maxDepth ?? DEFAULT_MAX_DEPTH$1,
|
|
7857
|
+
maxConcurrency: scraperOptions?.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY,
|
|
7858
|
+
ignoreErrors: scraperOptions?.ignoreErrors ?? true,
|
|
7859
|
+
scrapeMode: scraperOptions?.scrapeMode ?? ScrapeMode.Auto,
|
|
7860
|
+
// Pass scrapeMode enum
|
|
7861
|
+
includePatterns: scraperOptions?.includePatterns,
|
|
7862
|
+
excludePatterns: scraperOptions?.excludePatterns,
|
|
7863
|
+
headers: scraperOptions?.headers
|
|
7864
|
+
// <-- propagate headers
|
|
7865
|
+
});
|
|
7866
|
+
if (waitForCompletion) {
|
|
7867
|
+
try {
|
|
7868
|
+
await pipeline.waitForJobCompletion(jobId);
|
|
7869
|
+
const finalJob = await pipeline.getJob(jobId);
|
|
7870
|
+
const finalPagesScraped = finalJob?.progress?.pagesScraped ?? 0;
|
|
7871
|
+
logger.debug(
|
|
7872
|
+
`Job ${jobId} finished with status ${finalJob?.status}. Pages scraped: ${finalPagesScraped}`
|
|
7873
|
+
);
|
|
7874
|
+
return {
|
|
7875
|
+
pagesScraped: finalPagesScraped
|
|
7876
|
+
};
|
|
7877
|
+
} catch (error) {
|
|
7878
|
+
logger.error(`❌ Job ${jobId} failed or was cancelled: ${error}`);
|
|
7879
|
+
throw error;
|
|
7880
|
+
}
|
|
7881
|
+
}
|
|
7882
|
+
return { jobId };
|
|
8019
7883
|
}
|
|
8020
7884
|
}
|
|
8021
7885
|
class SearchTool {
|
|
@@ -8025,56 +7889,43 @@ class SearchTool {
|
|
|
8025
7889
|
}
|
|
8026
7890
|
async execute(options) {
|
|
8027
7891
|
const { library, version: version2, query, limit = 5, exactMatch = false } = options;
|
|
8028
|
-
|
|
8029
|
-
|
|
8030
|
-
|
|
8031
|
-
|
|
8032
|
-
|
|
8033
|
-
|
|
8034
|
-
|
|
8035
|
-
|
|
8036
|
-
|
|
8037
|
-
|
|
8038
|
-
|
|
8039
|
-
|
|
8040
|
-
|
|
8041
|
-
|
|
8042
|
-
|
|
8043
|
-
|
|
8044
|
-
|
|
8045
|
-
|
|
8046
|
-
|
|
8047
|
-
|
|
8048
|
-
|
|
8049
|
-
|
|
8050
|
-
|
|
8051
|
-
|
|
8052
|
-
versionToSearch = versionResult.bestMatch;
|
|
8053
|
-
}
|
|
8054
|
-
const results = await this.docService.searchStore(
|
|
8055
|
-
library,
|
|
8056
|
-
versionToSearch,
|
|
8057
|
-
query,
|
|
8058
|
-
limit
|
|
8059
|
-
);
|
|
8060
|
-
logger.info(`✅ Found ${results.length} matching results`);
|
|
8061
|
-
return { results };
|
|
8062
|
-
} catch (error) {
|
|
8063
|
-
logger.error(
|
|
8064
|
-
`❌ Search failed: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
8065
|
-
);
|
|
8066
|
-
throw error;
|
|
8067
|
-
}
|
|
8068
|
-
},
|
|
8069
|
-
(result) => ({
|
|
7892
|
+
if (exactMatch && (!version2 || version2 === "latest")) {
|
|
7893
|
+
await this.docService.validateLibraryExists(library);
|
|
7894
|
+
const allLibraries = await this.docService.listLibraries();
|
|
7895
|
+
const libraryInfo = allLibraries.find((lib) => lib.library === library);
|
|
7896
|
+
const detailedVersions = libraryInfo ? libraryInfo.versions.map((v) => ({
|
|
7897
|
+
version: v.ref.version,
|
|
7898
|
+
documentCount: v.counts.documents,
|
|
7899
|
+
uniqueUrlCount: v.counts.uniqueUrls,
|
|
7900
|
+
indexedAt: v.indexedAt
|
|
7901
|
+
})) : [];
|
|
7902
|
+
throw new VersionNotFoundError(library, version2 ?? "latest", detailedVersions);
|
|
7903
|
+
}
|
|
7904
|
+
const resolvedVersion = version2 || "latest";
|
|
7905
|
+
logger.info(
|
|
7906
|
+
`🔍 Searching ${library}@${resolvedVersion} for: ${query}${exactMatch ? " (exact match)" : ""}`
|
|
7907
|
+
);
|
|
7908
|
+
try {
|
|
7909
|
+
await this.docService.validateLibraryExists(library);
|
|
7910
|
+
let versionToSearch = resolvedVersion;
|
|
7911
|
+
if (!exactMatch) {
|
|
7912
|
+
const versionResult = await this.docService.findBestVersion(library, version2);
|
|
7913
|
+
versionToSearch = versionResult.bestMatch;
|
|
7914
|
+
}
|
|
7915
|
+
const results = await this.docService.searchStore(
|
|
8070
7916
|
library,
|
|
8071
|
-
|
|
7917
|
+
versionToSearch,
|
|
8072
7918
|
query,
|
|
8073
|
-
limit
|
|
8074
|
-
|
|
8075
|
-
|
|
8076
|
-
}
|
|
8077
|
-
)
|
|
7919
|
+
limit
|
|
7920
|
+
);
|
|
7921
|
+
logger.info(`✅ Found ${results.length} matching results`);
|
|
7922
|
+
return { results };
|
|
7923
|
+
} catch (error) {
|
|
7924
|
+
logger.error(
|
|
7925
|
+
`❌ Search failed: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
7926
|
+
);
|
|
7927
|
+
throw error;
|
|
7928
|
+
}
|
|
8078
7929
|
}
|
|
8079
7930
|
}
|
|
8080
7931
|
function createResponse(text) {
|
|
@@ -8133,6 +7984,17 @@ function createMcpServerInstance(tools, readOnly = false) {
|
|
|
8133
7984
|
// requires internet access
|
|
8134
7985
|
},
|
|
8135
7986
|
async ({ url, library, version: version2, maxPages, maxDepth, scope, followRedirects }) => {
|
|
7987
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
7988
|
+
tool: "scrape_docs",
|
|
7989
|
+
context: "mcp_server",
|
|
7990
|
+
library,
|
|
7991
|
+
version: version2,
|
|
7992
|
+
url: new URL(url).hostname,
|
|
7993
|
+
// Privacy-safe URL tracking
|
|
7994
|
+
maxPages,
|
|
7995
|
+
maxDepth,
|
|
7996
|
+
scope
|
|
7997
|
+
});
|
|
8136
7998
|
try {
|
|
8137
7999
|
const result = await tools.scrape.execute({
|
|
8138
8000
|
url,
|
|
@@ -8177,6 +8039,15 @@ function createMcpServerInstance(tools, readOnly = false) {
|
|
|
8177
8039
|
destructiveHint: false
|
|
8178
8040
|
},
|
|
8179
8041
|
async ({ library, version: version2, query, limit }) => {
|
|
8042
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8043
|
+
tool: "search_docs",
|
|
8044
|
+
context: "mcp_server",
|
|
8045
|
+
library,
|
|
8046
|
+
version: version2,
|
|
8047
|
+
query: query.substring(0, 100),
|
|
8048
|
+
// Truncate query for privacy
|
|
8049
|
+
limit
|
|
8050
|
+
});
|
|
8180
8051
|
try {
|
|
8181
8052
|
const result = await tools.search.execute({
|
|
8182
8053
|
library,
|
|
@@ -8236,6 +8107,10 @@ ${r.content}
|
|
|
8236
8107
|
destructiveHint: false
|
|
8237
8108
|
},
|
|
8238
8109
|
async () => {
|
|
8110
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8111
|
+
tool: "list_libraries",
|
|
8112
|
+
context: "mcp_server"
|
|
8113
|
+
});
|
|
8239
8114
|
try {
|
|
8240
8115
|
const result = await tools.listLibraries.execute();
|
|
8241
8116
|
if (result.libraries.length === 0) {
|
|
@@ -8266,6 +8141,12 @@ ${result.libraries.map((lib) => `- ${lib.name}`).join("\n")}`
|
|
|
8266
8141
|
destructiveHint: false
|
|
8267
8142
|
},
|
|
8268
8143
|
async ({ library, targetVersion }) => {
|
|
8144
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8145
|
+
tool: "find_version",
|
|
8146
|
+
context: "mcp_server",
|
|
8147
|
+
library,
|
|
8148
|
+
targetVersion
|
|
8149
|
+
});
|
|
8269
8150
|
try {
|
|
8270
8151
|
const message = await tools.findVersion.execute({
|
|
8271
8152
|
library,
|
|
@@ -8295,6 +8176,11 @@ ${result.libraries.map((lib) => `- ${lib.name}`).join("\n")}`
|
|
|
8295
8176
|
destructiveHint: false
|
|
8296
8177
|
},
|
|
8297
8178
|
async ({ status }) => {
|
|
8179
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8180
|
+
tool: "list_jobs",
|
|
8181
|
+
context: "mcp_server",
|
|
8182
|
+
status
|
|
8183
|
+
});
|
|
8298
8184
|
try {
|
|
8299
8185
|
const result = await tools.listJobs.execute({
|
|
8300
8186
|
status
|
|
@@ -8333,6 +8219,11 @@ ${formattedJobs}` : "No jobs found."
|
|
|
8333
8219
|
destructiveHint: false
|
|
8334
8220
|
},
|
|
8335
8221
|
async ({ jobId }) => {
|
|
8222
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8223
|
+
tool: "get_job_info",
|
|
8224
|
+
context: "mcp_server",
|
|
8225
|
+
jobId
|
|
8226
|
+
});
|
|
8336
8227
|
try {
|
|
8337
8228
|
const result = await tools.getJobInfo.execute({ jobId });
|
|
8338
8229
|
if (!result.job) {
|
|
@@ -8367,6 +8258,11 @@ ${formattedJob}`);
|
|
|
8367
8258
|
destructiveHint: true
|
|
8368
8259
|
},
|
|
8369
8260
|
async ({ jobId }) => {
|
|
8261
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8262
|
+
tool: "cancel_job",
|
|
8263
|
+
context: "mcp_server",
|
|
8264
|
+
jobId
|
|
8265
|
+
});
|
|
8370
8266
|
try {
|
|
8371
8267
|
const result = await tools.cancelJob.execute({ jobId });
|
|
8372
8268
|
if (result.success) {
|
|
@@ -8392,6 +8288,12 @@ ${formattedJob}`);
|
|
|
8392
8288
|
destructiveHint: true
|
|
8393
8289
|
},
|
|
8394
8290
|
async ({ library, version: version2 }) => {
|
|
8291
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8292
|
+
tool: "remove_docs",
|
|
8293
|
+
context: "mcp_server",
|
|
8294
|
+
library,
|
|
8295
|
+
version: version2
|
|
8296
|
+
});
|
|
8395
8297
|
try {
|
|
8396
8298
|
const result = await tools.remove.execute({ library, version: version2 });
|
|
8397
8299
|
return createResponse(result.message);
|
|
@@ -8418,6 +8320,13 @@ ${formattedJob}`);
|
|
|
8418
8320
|
// requires internet access
|
|
8419
8321
|
},
|
|
8420
8322
|
async ({ url, followRedirects }) => {
|
|
8323
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8324
|
+
tool: "fetch_url",
|
|
8325
|
+
context: "mcp_server",
|
|
8326
|
+
url: new URL(url).hostname,
|
|
8327
|
+
// Privacy-safe URL tracking
|
|
8328
|
+
followRedirects
|
|
8329
|
+
});
|
|
8421
8330
|
try {
|
|
8422
8331
|
const result = await tools.fetchUrl.execute({ url, followRedirects });
|
|
8423
8332
|
return createResponse(result);
|
|
@@ -8677,6 +8586,18 @@ function createPipelineRouter(trpc) {
|
|
|
8677
8586
|
input.version ?? null,
|
|
8678
8587
|
input.options
|
|
8679
8588
|
);
|
|
8589
|
+
analytics.track(TelemetryEvent.WEB_SCRAPE_STARTED, {
|
|
8590
|
+
library: input.library,
|
|
8591
|
+
version: input.version || void 0,
|
|
8592
|
+
url: input.options.url,
|
|
8593
|
+
scope: input.options.scope || "subpages",
|
|
8594
|
+
maxDepth: input.options.maxDepth || 3,
|
|
8595
|
+
maxPages: input.options.maxPages || 1e3,
|
|
8596
|
+
maxConcurrency: input.options.maxConcurrency,
|
|
8597
|
+
ignoreErrors: input.options.ignoreErrors,
|
|
8598
|
+
scrapeMode: input.options.scrapeMode,
|
|
8599
|
+
hasCustomHeaders: !!(input.options.headers && Object.keys(input.options.headers).length > 0)
|
|
8600
|
+
});
|
|
8680
8601
|
return { jobId };
|
|
8681
8602
|
}
|
|
8682
8603
|
),
|
|
@@ -8760,6 +8681,13 @@ function createDataRouter(trpc) {
|
|
|
8760
8681
|
input.query,
|
|
8761
8682
|
input.limit ?? 5
|
|
8762
8683
|
);
|
|
8684
|
+
analytics.track(TelemetryEvent.WEB_SEARCH_PERFORMED, {
|
|
8685
|
+
library: input.library,
|
|
8686
|
+
version: input.version || void 0,
|
|
8687
|
+
queryLength: input.query.length,
|
|
8688
|
+
resultCount: results.length,
|
|
8689
|
+
limit: input.limit ?? 5
|
|
8690
|
+
});
|
|
8763
8691
|
return results;
|
|
8764
8692
|
}
|
|
8765
8693
|
),
|
|
@@ -10300,22 +10228,6 @@ async function registerWorkerService(pipeline) {
|
|
|
10300
10228
|
logger.debug(
|
|
10301
10229
|
`Job ${job.id} progress: ${progress.pagesScraped}/${progress.totalPages} pages`
|
|
10302
10230
|
);
|
|
10303
|
-
analytics.track(TelemetryEvent.PIPELINE_JOB_PROGRESS, {
|
|
10304
|
-
jobId: job.id,
|
|
10305
|
-
// Job IDs are already anonymous
|
|
10306
|
-
library: job.library,
|
|
10307
|
-
pagesScraped: progress.pagesScraped,
|
|
10308
|
-
totalPages: progress.totalPages,
|
|
10309
|
-
totalDiscovered: progress.totalDiscovered,
|
|
10310
|
-
progressPercent: Math.round(progress.pagesScraped / progress.totalPages * 100),
|
|
10311
|
-
currentDepth: progress.depth,
|
|
10312
|
-
maxDepth: progress.maxDepth,
|
|
10313
|
-
discoveryRatio: Math.round(
|
|
10314
|
-
progress.totalDiscovered / progress.totalPages * 100
|
|
10315
|
-
),
|
|
10316
|
-
// How much we discovered vs limited total
|
|
10317
|
-
queueEfficiency: progress.totalPages > 0 ? Math.round(progress.pagesScraped / progress.totalPages * 100) : 0
|
|
10318
|
-
});
|
|
10319
10231
|
},
|
|
10320
10232
|
onJobStatusChange: async (job) => {
|
|
10321
10233
|
logger.debug(`Job ${job.id} status changed to: ${job.status}`);
|
|
@@ -11731,9 +11643,9 @@ class DocumentStore {
|
|
|
11731
11643
|
return [...vector, ...new Array(this.dbDimension - vector.length).fill(0)];
|
|
11732
11644
|
}
|
|
11733
11645
|
/**
|
|
11734
|
-
* Initialize the embeddings client using
|
|
11735
|
-
* If no embedding config is provided (null), embeddings will not be initialized.
|
|
11736
|
-
* This allows DocumentStore to be used without embeddings for operations
|
|
11646
|
+
* Initialize the embeddings client using the provided config.
|
|
11647
|
+
* If no embedding config is provided (null or undefined), embeddings will not be initialized.
|
|
11648
|
+
* This allows DocumentStore to be used without embeddings for FTS-only operations.
|
|
11737
11649
|
*
|
|
11738
11650
|
* Environment variables per provider:
|
|
11739
11651
|
* - openai: OPENAI_API_KEY (and optionally OPENAI_API_BASE, OPENAI_ORG_ID)
|
|
@@ -11743,11 +11655,13 @@ class DocumentStore {
|
|
|
11743
11655
|
* - microsoft: Azure OpenAI credentials (AZURE_OPENAI_API_*)
|
|
11744
11656
|
*/
|
|
11745
11657
|
async initializeEmbeddings() {
|
|
11746
|
-
if (this.embeddingConfig === null) {
|
|
11747
|
-
logger.debug(
|
|
11658
|
+
if (this.embeddingConfig === null || this.embeddingConfig === void 0) {
|
|
11659
|
+
logger.debug(
|
|
11660
|
+
"Embedding initialization skipped (no config provided - FTS-only mode)"
|
|
11661
|
+
);
|
|
11748
11662
|
return;
|
|
11749
11663
|
}
|
|
11750
|
-
const config = this.embeddingConfig
|
|
11664
|
+
const config = this.embeddingConfig;
|
|
11751
11665
|
if (!areCredentialsAvailable(config.provider)) {
|
|
11752
11666
|
logger.warn(
|
|
11753
11667
|
`⚠️ No credentials found for ${config.provider} embedding provider. Vector search is disabled.
|
|
@@ -12057,7 +11971,7 @@ class DocumentStore {
|
|
|
12057
11971
|
`;
|
|
12058
11972
|
return `${header}${doc.pageContent}`;
|
|
12059
11973
|
});
|
|
12060
|
-
const maxBatchChars =
|
|
11974
|
+
const maxBatchChars = EMBEDDING_BATCH_CHARS;
|
|
12061
11975
|
const rawEmbeddings = [];
|
|
12062
11976
|
let currentBatch = [];
|
|
12063
11977
|
let currentBatchSize = 0;
|
|
@@ -12533,14 +12447,13 @@ class DocumentManagementService {
|
|
|
12533
12447
|
normalizeVersion(version2) {
|
|
12534
12448
|
return (version2 ?? "").toLowerCase();
|
|
12535
12449
|
}
|
|
12536
|
-
constructor(embeddingConfig, pipelineConfig) {
|
|
12450
|
+
constructor(embeddingConfig, pipelineConfig, storePath) {
|
|
12537
12451
|
let dbPath;
|
|
12538
12452
|
let dbDir;
|
|
12539
|
-
|
|
12540
|
-
|
|
12541
|
-
dbDir = envStorePath;
|
|
12453
|
+
if (storePath) {
|
|
12454
|
+
dbDir = storePath;
|
|
12542
12455
|
dbPath = path.join(dbDir, "documents.db");
|
|
12543
|
-
logger.debug(`Using database directory from
|
|
12456
|
+
logger.debug(`Using database directory from storePath parameter: ${dbDir}`);
|
|
12544
12457
|
} else {
|
|
12545
12458
|
const projectRoot2 = getProjectRoot();
|
|
12546
12459
|
const oldDbDir = path.join(projectRoot2, ".store");
|
|
@@ -12916,41 +12829,72 @@ async function createDocumentManagement(options = {}) {
|
|
|
12916
12829
|
await client.initialize();
|
|
12917
12830
|
return client;
|
|
12918
12831
|
}
|
|
12919
|
-
const service = new DocumentManagementService(
|
|
12832
|
+
const service = new DocumentManagementService(
|
|
12833
|
+
options.embeddingConfig,
|
|
12834
|
+
void 0,
|
|
12835
|
+
options.storePath
|
|
12836
|
+
);
|
|
12920
12837
|
await service.initialize();
|
|
12921
12838
|
return service;
|
|
12922
12839
|
}
|
|
12923
|
-
async function createLocalDocumentManagement(embeddingConfig) {
|
|
12924
|
-
const service = new DocumentManagementService(embeddingConfig);
|
|
12840
|
+
async function createLocalDocumentManagement(embeddingConfig, storePath) {
|
|
12841
|
+
const service = new DocumentManagementService(embeddingConfig, void 0, storePath);
|
|
12925
12842
|
await service.initialize();
|
|
12926
12843
|
return service;
|
|
12927
12844
|
}
|
|
12928
12845
|
function createDefaultAction(program) {
|
|
12929
12846
|
return program.addOption(
|
|
12930
|
-
new Option("--protocol <protocol>", "Protocol for MCP server").choices(["auto", "stdio", "http"])
|
|
12847
|
+
new Option("--protocol <protocol>", "Protocol for MCP server").env("DOCS_MCP_PROTOCOL").default("auto").choices(["auto", "stdio", "http"])
|
|
12931
12848
|
).addOption(
|
|
12932
|
-
new Option("--port <number>", "Port for the server").argParser((v) => {
|
|
12849
|
+
new Option("--port <number>", "Port for the server").env("DOCS_MCP_PORT").env("PORT").default(CLI_DEFAULTS.HTTP_PORT.toString()).argParser((v) => {
|
|
12933
12850
|
const n = Number(v);
|
|
12934
12851
|
if (!Number.isInteger(n) || n < 1 || n > 65535) {
|
|
12935
12852
|
throw new Error("Port must be an integer between 1 and 65535");
|
|
12936
12853
|
}
|
|
12937
12854
|
return String(n);
|
|
12938
|
-
})
|
|
12855
|
+
})
|
|
12856
|
+
).addOption(
|
|
12857
|
+
new Option("--host <host>", "Host to bind the server to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
|
|
12939
12858
|
).addOption(
|
|
12940
|
-
new Option(
|
|
12859
|
+
new Option(
|
|
12860
|
+
"--embedding-model <model>",
|
|
12861
|
+
"Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
|
|
12862
|
+
).env("DOCS_MCP_EMBEDDING_MODEL")
|
|
12941
12863
|
).option("--resume", "Resume interrupted jobs on startup", false).option("--no-resume", "Do not resume jobs on startup").option(
|
|
12942
12864
|
"--read-only",
|
|
12943
12865
|
"Run in read-only mode (only expose read tools, disable write/job tools)",
|
|
12944
12866
|
false
|
|
12945
|
-
).
|
|
12946
|
-
|
|
12947
|
-
|
|
12948
|
-
|
|
12949
|
-
|
|
12950
|
-
|
|
12951
|
-
|
|
12867
|
+
).addOption(
|
|
12868
|
+
new Option(
|
|
12869
|
+
"--auth-enabled",
|
|
12870
|
+
"Enable OAuth2/OIDC authentication for MCP endpoints"
|
|
12871
|
+
).env("DOCS_MCP_AUTH_ENABLED").argParser((value) => {
|
|
12872
|
+
if (value === void 0) {
|
|
12873
|
+
return process.env.DOCS_MCP_AUTH_ENABLED === "true" || process.env.DOCS_MCP_AUTH_ENABLED === "1";
|
|
12874
|
+
}
|
|
12875
|
+
return value;
|
|
12876
|
+
}).default(false)
|
|
12877
|
+
).addOption(
|
|
12878
|
+
new Option(
|
|
12879
|
+
"--auth-issuer-url <url>",
|
|
12880
|
+
"Issuer/discovery URL for OAuth2/OIDC provider"
|
|
12881
|
+
).env("DOCS_MCP_AUTH_ISSUER_URL")
|
|
12882
|
+
).addOption(
|
|
12883
|
+
new Option(
|
|
12884
|
+
"--auth-audience <id>",
|
|
12885
|
+
"JWT audience claim (identifies this protected resource)"
|
|
12886
|
+
).env("DOCS_MCP_AUTH_AUDIENCE")
|
|
12952
12887
|
).action(
|
|
12953
12888
|
async (options) => {
|
|
12889
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
12890
|
+
command: "default",
|
|
12891
|
+
protocol: options.protocol,
|
|
12892
|
+
port: options.port,
|
|
12893
|
+
host: options.host,
|
|
12894
|
+
resume: options.resume,
|
|
12895
|
+
readOnly: options.readOnly,
|
|
12896
|
+
authEnabled: !!options.authEnabled
|
|
12897
|
+
});
|
|
12954
12898
|
const resolvedProtocol = resolveProtocol(options.protocol);
|
|
12955
12899
|
if (resolvedProtocol === "stdio") {
|
|
12956
12900
|
setLogLevel(LogLevel.ERROR);
|
|
@@ -12967,9 +12911,13 @@ function createDefaultAction(program) {
|
|
|
12967
12911
|
validateAuthConfig(authConfig);
|
|
12968
12912
|
warnHttpUsage(authConfig, port);
|
|
12969
12913
|
}
|
|
12914
|
+
const globalOptions = program.parent?.opts() || {};
|
|
12970
12915
|
ensurePlaywrightBrowsersInstalled();
|
|
12971
|
-
const embeddingConfig = resolveEmbeddingContext();
|
|
12972
|
-
const docService = await createLocalDocumentManagement(
|
|
12916
|
+
const embeddingConfig = resolveEmbeddingContext(options.embeddingModel);
|
|
12917
|
+
const docService = await createLocalDocumentManagement(
|
|
12918
|
+
embeddingConfig,
|
|
12919
|
+
globalOptions.storePath
|
|
12920
|
+
);
|
|
12973
12921
|
const pipelineOptions = {
|
|
12974
12922
|
recoverJobs: options.resume || false,
|
|
12975
12923
|
// Use --resume flag for job recovery
|
|
@@ -13021,6 +12969,13 @@ function createDefaultAction(program) {
|
|
|
13021
12969
|
);
|
|
13022
12970
|
}
|
|
13023
12971
|
async function fetchUrlAction(url, options) {
|
|
12972
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
12973
|
+
command: "fetch-url",
|
|
12974
|
+
url,
|
|
12975
|
+
scrapeMode: options.scrapeMode,
|
|
12976
|
+
followRedirects: options.followRedirects,
|
|
12977
|
+
hasHeaders: options.header.length > 0
|
|
12978
|
+
});
|
|
13024
12979
|
const headers = parseHeaders(options.header);
|
|
13025
12980
|
const fetchUrlTool = new FetchUrlTool(new HttpFetcher(), new FileFetcher());
|
|
13026
12981
|
const content = await fetchUrlTool.execute({
|
|
@@ -13057,6 +13012,12 @@ function createFetchUrlCommand(program) {
|
|
|
13057
13012
|
).action(fetchUrlAction);
|
|
13058
13013
|
}
|
|
13059
13014
|
async function findVersionAction(library, options) {
|
|
13015
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13016
|
+
command: "find-version",
|
|
13017
|
+
library,
|
|
13018
|
+
version: options.version,
|
|
13019
|
+
useServerUrl: !!options.serverUrl
|
|
13020
|
+
});
|
|
13060
13021
|
const serverUrl = options.serverUrl;
|
|
13061
13022
|
const docService = await createDocumentManagement({
|
|
13062
13023
|
serverUrl,
|
|
@@ -13081,6 +13042,10 @@ function createFindVersionCommand(program) {
|
|
|
13081
13042
|
).action(findVersionAction);
|
|
13082
13043
|
}
|
|
13083
13044
|
async function listAction(options) {
|
|
13045
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13046
|
+
command: "list",
|
|
13047
|
+
useServerUrl: !!options.serverUrl
|
|
13048
|
+
});
|
|
13084
13049
|
const { serverUrl } = options;
|
|
13085
13050
|
const docService = await createDocumentManagement({
|
|
13086
13051
|
serverUrl,
|
|
@@ -13102,17 +13067,22 @@ function createListCommand(program) {
|
|
|
13102
13067
|
}
|
|
13103
13068
|
function createMcpCommand(program) {
|
|
13104
13069
|
return program.command("mcp").description("Start MCP server only").addOption(
|
|
13105
|
-
new Option("--protocol <protocol>", "Protocol for MCP server").choices(["auto", "stdio", "http"])
|
|
13070
|
+
new Option("--protocol <protocol>", "Protocol for MCP server").env("DOCS_MCP_PROTOCOL").default(CLI_DEFAULTS.PROTOCOL).choices(["auto", "stdio", "http"])
|
|
13106
13071
|
).addOption(
|
|
13107
|
-
new Option("--port <number>", "Port for the MCP server").argParser((v) => {
|
|
13072
|
+
new Option("--port <number>", "Port for the MCP server").env("DOCS_MCP_PORT").env("PORT").default(CLI_DEFAULTS.HTTP_PORT.toString()).argParser((v) => {
|
|
13108
13073
|
const n = Number(v);
|
|
13109
13074
|
if (!Number.isInteger(n) || n < 1 || n > 65535) {
|
|
13110
13075
|
throw new Error("Port must be an integer between 1 and 65535");
|
|
13111
13076
|
}
|
|
13112
13077
|
return String(n);
|
|
13113
|
-
})
|
|
13078
|
+
})
|
|
13079
|
+
).addOption(
|
|
13080
|
+
new Option("--host <host>", "Host to bind the MCP server to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
|
|
13114
13081
|
).addOption(
|
|
13115
|
-
new Option(
|
|
13082
|
+
new Option(
|
|
13083
|
+
"--embedding-model <model>",
|
|
13084
|
+
"Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
|
|
13085
|
+
).env("DOCS_MCP_EMBEDDING_MODEL")
|
|
13116
13086
|
).option(
|
|
13117
13087
|
"--server-url <url>",
|
|
13118
13088
|
"URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
|
|
@@ -13120,15 +13090,37 @@ function createMcpCommand(program) {
|
|
|
13120
13090
|
"--read-only",
|
|
13121
13091
|
"Run in read-only mode (only expose read tools, disable write/job tools)",
|
|
13122
13092
|
false
|
|
13123
|
-
).
|
|
13124
|
-
|
|
13125
|
-
|
|
13126
|
-
|
|
13127
|
-
|
|
13128
|
-
|
|
13129
|
-
|
|
13093
|
+
).addOption(
|
|
13094
|
+
new Option(
|
|
13095
|
+
"--auth-enabled",
|
|
13096
|
+
"Enable OAuth2/OIDC authentication for MCP endpoints"
|
|
13097
|
+
).env("DOCS_MCP_AUTH_ENABLED").argParser((value) => {
|
|
13098
|
+
if (value === void 0) {
|
|
13099
|
+
return process.env.DOCS_MCP_AUTH_ENABLED === "true" || process.env.DOCS_MCP_AUTH_ENABLED === "1";
|
|
13100
|
+
}
|
|
13101
|
+
return value;
|
|
13102
|
+
}).default(false)
|
|
13103
|
+
).addOption(
|
|
13104
|
+
new Option(
|
|
13105
|
+
"--auth-issuer-url <url>",
|
|
13106
|
+
"Issuer/discovery URL for OAuth2/OIDC provider"
|
|
13107
|
+
).env("DOCS_MCP_AUTH_ISSUER_URL")
|
|
13108
|
+
).addOption(
|
|
13109
|
+
new Option(
|
|
13110
|
+
"--auth-audience <id>",
|
|
13111
|
+
"JWT audience claim (identifies this protected resource)"
|
|
13112
|
+
).env("DOCS_MCP_AUTH_AUDIENCE")
|
|
13130
13113
|
).action(
|
|
13131
13114
|
async (cmdOptions) => {
|
|
13115
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13116
|
+
command: "mcp",
|
|
13117
|
+
protocol: cmdOptions.protocol,
|
|
13118
|
+
port: cmdOptions.port,
|
|
13119
|
+
host: cmdOptions.host,
|
|
13120
|
+
useServerUrl: !!cmdOptions.serverUrl,
|
|
13121
|
+
readOnly: cmdOptions.readOnly,
|
|
13122
|
+
authEnabled: !!cmdOptions.authEnabled
|
|
13123
|
+
});
|
|
13132
13124
|
const port = validatePort(cmdOptions.port);
|
|
13133
13125
|
const host = validateHost(cmdOptions.host);
|
|
13134
13126
|
const serverUrl = cmdOptions.serverUrl;
|
|
@@ -13144,8 +13136,9 @@ function createMcpCommand(program) {
|
|
|
13144
13136
|
if (authConfig) {
|
|
13145
13137
|
validateAuthConfig(authConfig);
|
|
13146
13138
|
}
|
|
13139
|
+
const globalOptions = program.parent?.opts() || {};
|
|
13147
13140
|
try {
|
|
13148
|
-
const embeddingConfig = resolveEmbeddingContext();
|
|
13141
|
+
const embeddingConfig = resolveEmbeddingContext(cmdOptions.embeddingModel);
|
|
13149
13142
|
if (!serverUrl && !embeddingConfig) {
|
|
13150
13143
|
logger.error(
|
|
13151
13144
|
"❌ Embedding configuration is required for local mode. Configure an embedding provider with CLI options or environment variables."
|
|
@@ -13154,7 +13147,8 @@ function createMcpCommand(program) {
|
|
|
13154
13147
|
}
|
|
13155
13148
|
const docService = await createDocumentManagement({
|
|
13156
13149
|
serverUrl,
|
|
13157
|
-
embeddingConfig
|
|
13150
|
+
embeddingConfig,
|
|
13151
|
+
storePath: globalOptions.storePath
|
|
13158
13152
|
});
|
|
13159
13153
|
const pipelineOptions = {
|
|
13160
13154
|
recoverJobs: false,
|
|
@@ -13216,6 +13210,12 @@ function createMcpCommand(program) {
|
|
|
13216
13210
|
);
|
|
13217
13211
|
}
|
|
13218
13212
|
async function removeAction(library, options) {
|
|
13213
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13214
|
+
command: "remove",
|
|
13215
|
+
library,
|
|
13216
|
+
version: options.version,
|
|
13217
|
+
useServerUrl: !!options.serverUrl
|
|
13218
|
+
});
|
|
13219
13219
|
const serverUrl = options.serverUrl;
|
|
13220
13220
|
const docService = await createDocumentManagement({
|
|
13221
13221
|
serverUrl,
|
|
@@ -13244,9 +13244,26 @@ function createRemoveCommand(program) {
|
|
|
13244
13244
|
"URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
|
|
13245
13245
|
).action(removeAction);
|
|
13246
13246
|
}
|
|
13247
|
-
async function scrapeAction(library, url, options) {
|
|
13247
|
+
async function scrapeAction(library, url, options, command) {
|
|
13248
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13249
|
+
command: "scrape",
|
|
13250
|
+
library,
|
|
13251
|
+
version: options.version,
|
|
13252
|
+
url,
|
|
13253
|
+
maxPages: Number.parseInt(options.maxPages, 10),
|
|
13254
|
+
maxDepth: Number.parseInt(options.maxDepth, 10),
|
|
13255
|
+
maxConcurrency: Number.parseInt(options.maxConcurrency, 10),
|
|
13256
|
+
scope: options.scope,
|
|
13257
|
+
scrapeMode: options.scrapeMode,
|
|
13258
|
+
followRedirects: options.followRedirects,
|
|
13259
|
+
hasHeaders: options.header.length > 0,
|
|
13260
|
+
hasIncludePatterns: options.includePattern.length > 0,
|
|
13261
|
+
hasExcludePatterns: options.excludePattern.length > 0,
|
|
13262
|
+
useServerUrl: !!options.serverUrl
|
|
13263
|
+
});
|
|
13248
13264
|
const serverUrl = options.serverUrl;
|
|
13249
|
-
const
|
|
13265
|
+
const globalOptions = command?.parent?.opts() || {};
|
|
13266
|
+
const embeddingConfig = resolveEmbeddingContext(options.embeddingModel);
|
|
13250
13267
|
if (!serverUrl && !embeddingConfig) {
|
|
13251
13268
|
throw new Error(
|
|
13252
13269
|
"Embedding configuration is required for local scraping. Please set DOCS_MCP_EMBEDDING_MODEL environment variable or use --server-url for remote execution."
|
|
@@ -13254,7 +13271,8 @@ async function scrapeAction(library, url, options) {
|
|
|
13254
13271
|
}
|
|
13255
13272
|
const docService = await createDocumentManagement({
|
|
13256
13273
|
serverUrl,
|
|
13257
|
-
embeddingConfig
|
|
13274
|
+
embeddingConfig,
|
|
13275
|
+
storePath: globalOptions.storePath
|
|
13258
13276
|
});
|
|
13259
13277
|
let pipeline = null;
|
|
13260
13278
|
try {
|
|
@@ -13356,14 +13374,28 @@ function createScrapeCommand(program) {
|
|
|
13356
13374
|
"Custom HTTP header to send with each request (can be specified multiple times)",
|
|
13357
13375
|
(val, prev = []) => prev.concat([val]),
|
|
13358
13376
|
[]
|
|
13377
|
+
).addOption(
|
|
13378
|
+
new Option(
|
|
13379
|
+
"--embedding-model <model>",
|
|
13380
|
+
"Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
|
|
13381
|
+
).env("DOCS_MCP_EMBEDDING_MODEL")
|
|
13359
13382
|
).option(
|
|
13360
13383
|
"--server-url <url>",
|
|
13361
13384
|
"URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
|
|
13362
13385
|
).action(scrapeAction);
|
|
13363
13386
|
}
|
|
13364
13387
|
async function searchAction(library, query, options) {
|
|
13388
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13389
|
+
command: "search",
|
|
13390
|
+
library,
|
|
13391
|
+
version: options.version,
|
|
13392
|
+
query,
|
|
13393
|
+
limit: Number.parseInt(options.limit, 10),
|
|
13394
|
+
exactMatch: options.exactMatch,
|
|
13395
|
+
useServerUrl: !!options.serverUrl
|
|
13396
|
+
});
|
|
13365
13397
|
const serverUrl = options.serverUrl;
|
|
13366
|
-
const embeddingConfig = resolveEmbeddingContext();
|
|
13398
|
+
const embeddingConfig = resolveEmbeddingContext(options.embeddingModel);
|
|
13367
13399
|
if (!serverUrl && !embeddingConfig) {
|
|
13368
13400
|
throw new Error(
|
|
13369
13401
|
"Embedding configuration is required for local search. Please set DOCS_MCP_EMBEDDING_MODEL environment variable or use --server-url for remote execution."
|
|
@@ -13393,139 +13425,185 @@ function createSearchCommand(program) {
|
|
|
13393
13425
|
).option(
|
|
13394
13426
|
"-v, --version <string>",
|
|
13395
13427
|
"Version of the library (optional, supports ranges)"
|
|
13396
|
-
).option("-l, --limit <number>", "Maximum number of results", "5").option("-e, --exact-match", "Only use exact version match (default: false)", false).
|
|
13428
|
+
).option("-l, --limit <number>", "Maximum number of results", "5").option("-e, --exact-match", "Only use exact version match (default: false)", false).addOption(
|
|
13429
|
+
new Option(
|
|
13430
|
+
"--embedding-model <model>",
|
|
13431
|
+
"Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
|
|
13432
|
+
).env("DOCS_MCP_EMBEDDING_MODEL")
|
|
13433
|
+
).option(
|
|
13397
13434
|
"--server-url <url>",
|
|
13398
13435
|
"URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
|
|
13399
13436
|
).action(searchAction);
|
|
13400
13437
|
}
|
|
13401
13438
|
function createWebCommand(program) {
|
|
13402
13439
|
return program.command("web").description("Start web interface only").addOption(
|
|
13403
|
-
new Option("--port <number>", "Port for the web interface").argParser((v) => {
|
|
13440
|
+
new Option("--port <number>", "Port for the web interface").env("DOCS_MCP_WEB_PORT").env("DOCS_MCP_PORT").env("PORT").default(CLI_DEFAULTS.WEB_PORT.toString()).argParser((v) => {
|
|
13404
13441
|
const n = Number(v);
|
|
13405
13442
|
if (!Number.isInteger(n) || n < 1 || n > 65535) {
|
|
13406
13443
|
throw new Error("Port must be an integer between 1 and 65535");
|
|
13407
13444
|
}
|
|
13408
13445
|
return String(n);
|
|
13409
|
-
})
|
|
13446
|
+
})
|
|
13410
13447
|
).addOption(
|
|
13411
|
-
new Option("--host <host>", "Host to bind the web interface to").
|
|
13448
|
+
new Option("--host <host>", "Host to bind the web interface to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
|
|
13449
|
+
).addOption(
|
|
13450
|
+
new Option(
|
|
13451
|
+
"--embedding-model <model>",
|
|
13452
|
+
"Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
|
|
13453
|
+
).env("DOCS_MCP_EMBEDDING_MODEL")
|
|
13412
13454
|
).option(
|
|
13413
13455
|
"--server-url <url>",
|
|
13414
13456
|
"URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
|
|
13415
|
-
).action(
|
|
13416
|
-
|
|
13417
|
-
|
|
13418
|
-
|
|
13419
|
-
|
|
13420
|
-
|
|
13421
|
-
|
|
13422
|
-
|
|
13423
|
-
|
|
13457
|
+
).action(
|
|
13458
|
+
async (cmdOptions) => {
|
|
13459
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13460
|
+
command: "web",
|
|
13461
|
+
port: cmdOptions.port,
|
|
13462
|
+
host: cmdOptions.host,
|
|
13463
|
+
useServerUrl: !!cmdOptions.serverUrl
|
|
13464
|
+
});
|
|
13465
|
+
const port = validatePort(cmdOptions.port);
|
|
13466
|
+
const host = validateHost(cmdOptions.host);
|
|
13467
|
+
const serverUrl = cmdOptions.serverUrl;
|
|
13468
|
+
try {
|
|
13469
|
+
const embeddingConfig = resolveEmbeddingContext(cmdOptions.embeddingModel);
|
|
13470
|
+
if (!serverUrl && !embeddingConfig) {
|
|
13471
|
+
logger.error(
|
|
13472
|
+
"❌ Embedding configuration is required for local mode. Configure an embedding provider with CLI options or environment variables."
|
|
13473
|
+
);
|
|
13474
|
+
process.exit(1);
|
|
13475
|
+
}
|
|
13476
|
+
const docService = await createDocumentManagement({
|
|
13477
|
+
serverUrl,
|
|
13478
|
+
embeddingConfig
|
|
13479
|
+
});
|
|
13480
|
+
const pipelineOptions = {
|
|
13481
|
+
recoverJobs: false,
|
|
13482
|
+
// Web command doesn't support job recovery
|
|
13483
|
+
serverUrl,
|
|
13484
|
+
concurrency: 3
|
|
13485
|
+
};
|
|
13486
|
+
const pipeline = await createPipelineWithCallbacks(
|
|
13487
|
+
serverUrl ? void 0 : docService,
|
|
13488
|
+
pipelineOptions
|
|
13489
|
+
);
|
|
13490
|
+
const config = createAppServerConfig({
|
|
13491
|
+
enableWebInterface: true,
|
|
13492
|
+
enableMcpServer: false,
|
|
13493
|
+
enableApiServer: false,
|
|
13494
|
+
enableWorker: !serverUrl,
|
|
13495
|
+
port,
|
|
13496
|
+
host,
|
|
13497
|
+
externalWorkerUrl: serverUrl,
|
|
13498
|
+
startupContext: {
|
|
13499
|
+
cliCommand: "web"
|
|
13500
|
+
}
|
|
13501
|
+
});
|
|
13502
|
+
logger.info(
|
|
13503
|
+
`🚀 Starting web interface${serverUrl ? ` connecting to worker at ${serverUrl}` : ""}`
|
|
13424
13504
|
);
|
|
13505
|
+
const appServer = await startAppServer(docService, pipeline, config);
|
|
13506
|
+
registerGlobalServices({
|
|
13507
|
+
appServer,
|
|
13508
|
+
docService
|
|
13509
|
+
// pipeline is owned by AppServer - don't register globally to avoid double shutdown
|
|
13510
|
+
});
|
|
13511
|
+
await new Promise(() => {
|
|
13512
|
+
});
|
|
13513
|
+
} catch (error) {
|
|
13514
|
+
logger.error(`❌ Failed to start web interface: ${error}`);
|
|
13425
13515
|
process.exit(1);
|
|
13426
13516
|
}
|
|
13427
|
-
const docService = await createDocumentManagement({
|
|
13428
|
-
serverUrl,
|
|
13429
|
-
embeddingConfig
|
|
13430
|
-
});
|
|
13431
|
-
const pipelineOptions = {
|
|
13432
|
-
recoverJobs: false,
|
|
13433
|
-
// Web command doesn't support job recovery
|
|
13434
|
-
serverUrl,
|
|
13435
|
-
concurrency: 3
|
|
13436
|
-
};
|
|
13437
|
-
const pipeline = await createPipelineWithCallbacks(
|
|
13438
|
-
serverUrl ? void 0 : docService,
|
|
13439
|
-
pipelineOptions
|
|
13440
|
-
);
|
|
13441
|
-
const config = createAppServerConfig({
|
|
13442
|
-
enableWebInterface: true,
|
|
13443
|
-
enableMcpServer: false,
|
|
13444
|
-
enableApiServer: false,
|
|
13445
|
-
enableWorker: !serverUrl,
|
|
13446
|
-
port,
|
|
13447
|
-
host,
|
|
13448
|
-
externalWorkerUrl: serverUrl,
|
|
13449
|
-
startupContext: {
|
|
13450
|
-
cliCommand: "web"
|
|
13451
|
-
}
|
|
13452
|
-
});
|
|
13453
|
-
logger.info(
|
|
13454
|
-
`🚀 Starting web interface${serverUrl ? ` connecting to worker at ${serverUrl}` : ""}`
|
|
13455
|
-
);
|
|
13456
|
-
const appServer = await startAppServer(docService, pipeline, config);
|
|
13457
|
-
registerGlobalServices({
|
|
13458
|
-
appServer,
|
|
13459
|
-
docService
|
|
13460
|
-
// pipeline is owned by AppServer - don't register globally to avoid double shutdown
|
|
13461
|
-
});
|
|
13462
|
-
await new Promise(() => {
|
|
13463
|
-
});
|
|
13464
|
-
} catch (error) {
|
|
13465
|
-
logger.error(`❌ Failed to start web interface: ${error}`);
|
|
13466
|
-
process.exit(1);
|
|
13467
13517
|
}
|
|
13468
|
-
|
|
13518
|
+
);
|
|
13469
13519
|
}
|
|
13470
13520
|
function createWorkerCommand(program) {
|
|
13471
13521
|
return program.command("worker").description("Start external pipeline worker (HTTP API)").addOption(
|
|
13472
|
-
new Option("--port <number>", "Port for worker API").argParser((v) => {
|
|
13522
|
+
new Option("--port <number>", "Port for worker API").env("DOCS_MCP_PORT").env("PORT").default("8080").argParser((v) => {
|
|
13473
13523
|
const n = Number(v);
|
|
13474
13524
|
if (!Number.isInteger(n) || n < 1 || n > 65535) {
|
|
13475
13525
|
throw new Error("Port must be an integer between 1 and 65535");
|
|
13476
13526
|
}
|
|
13477
13527
|
return String(n);
|
|
13478
|
-
})
|
|
13528
|
+
})
|
|
13479
13529
|
).addOption(
|
|
13480
|
-
new Option("--host <host>", "Host to bind the worker API to").
|
|
13481
|
-
).
|
|
13482
|
-
|
|
13483
|
-
|
|
13484
|
-
|
|
13485
|
-
|
|
13486
|
-
|
|
13487
|
-
|
|
13488
|
-
|
|
13489
|
-
|
|
13490
|
-
|
|
13491
|
-
|
|
13492
|
-
|
|
13493
|
-
};
|
|
13494
|
-
const pipeline = await createPipelineWithCallbacks(docService, pipelineOptions);
|
|
13495
|
-
const config = createAppServerConfig({
|
|
13496
|
-
enableWebInterface: false,
|
|
13497
|
-
enableMcpServer: false,
|
|
13498
|
-
enableApiServer: true,
|
|
13499
|
-
enableWorker: true,
|
|
13500
|
-
port,
|
|
13501
|
-
host,
|
|
13502
|
-
startupContext: {
|
|
13503
|
-
cliCommand: "worker"
|
|
13504
|
-
}
|
|
13505
|
-
});
|
|
13506
|
-
const appServer = await startAppServer(docService, pipeline, config);
|
|
13507
|
-
registerGlobalServices({
|
|
13508
|
-
appServer,
|
|
13509
|
-
docService
|
|
13510
|
-
// pipeline is owned by AppServer - don't register globally to avoid double shutdown
|
|
13511
|
-
});
|
|
13512
|
-
await new Promise(() => {
|
|
13530
|
+
new Option("--host <host>", "Host to bind the worker API to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
|
|
13531
|
+
).addOption(
|
|
13532
|
+
new Option(
|
|
13533
|
+
"--embedding-model <model>",
|
|
13534
|
+
"Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
|
|
13535
|
+
).env("DOCS_MCP_EMBEDDING_MODEL")
|
|
13536
|
+
).option("--resume", "Resume interrupted jobs on startup", true).option("--no-resume", "Do not resume jobs on startup").action(
|
|
13537
|
+
async (cmdOptions) => {
|
|
13538
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13539
|
+
command: "worker",
|
|
13540
|
+
port: cmdOptions.port,
|
|
13541
|
+
host: cmdOptions.host,
|
|
13542
|
+
resume: cmdOptions.resume
|
|
13513
13543
|
});
|
|
13514
|
-
|
|
13515
|
-
|
|
13516
|
-
|
|
13544
|
+
const port = validatePort(cmdOptions.port);
|
|
13545
|
+
const host = validateHost(cmdOptions.host);
|
|
13546
|
+
try {
|
|
13547
|
+
logger.info(`🚀 Starting external pipeline worker on port ${port}`);
|
|
13548
|
+
ensurePlaywrightBrowsersInstalled();
|
|
13549
|
+
const embeddingConfig = resolveEmbeddingContext(cmdOptions.embeddingModel);
|
|
13550
|
+
const docService = await createLocalDocumentManagement(embeddingConfig);
|
|
13551
|
+
const pipelineOptions = {
|
|
13552
|
+
recoverJobs: cmdOptions.resume,
|
|
13553
|
+
// Use the resume option
|
|
13554
|
+
concurrency: CLI_DEFAULTS.MAX_CONCURRENCY
|
|
13555
|
+
};
|
|
13556
|
+
const pipeline = await createPipelineWithCallbacks(docService, pipelineOptions);
|
|
13557
|
+
const config = createAppServerConfig({
|
|
13558
|
+
enableWebInterface: false,
|
|
13559
|
+
enableMcpServer: false,
|
|
13560
|
+
enableApiServer: true,
|
|
13561
|
+
enableWorker: true,
|
|
13562
|
+
port,
|
|
13563
|
+
host,
|
|
13564
|
+
startupContext: {
|
|
13565
|
+
cliCommand: "worker"
|
|
13566
|
+
}
|
|
13567
|
+
});
|
|
13568
|
+
const appServer = await startAppServer(docService, pipeline, config);
|
|
13569
|
+
registerGlobalServices({
|
|
13570
|
+
appServer,
|
|
13571
|
+
docService
|
|
13572
|
+
// pipeline is owned by AppServer - don't register globally to avoid double shutdown
|
|
13573
|
+
});
|
|
13574
|
+
await new Promise(() => {
|
|
13575
|
+
});
|
|
13576
|
+
} catch (error) {
|
|
13577
|
+
logger.error(`❌ Failed to start external pipeline worker: ${error}`);
|
|
13578
|
+
process.exit(1);
|
|
13579
|
+
}
|
|
13517
13580
|
}
|
|
13518
|
-
|
|
13581
|
+
);
|
|
13519
13582
|
}
|
|
13520
13583
|
function createCliProgram() {
|
|
13521
13584
|
const program = new Command();
|
|
13522
13585
|
const commandStartTimes = /* @__PURE__ */ new Map();
|
|
13523
13586
|
program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version(packageJson.version).addOption(
|
|
13524
13587
|
new Option("--verbose", "Enable verbose (debug) logging").conflicts("silent")
|
|
13525
|
-
).addOption(new Option("--silent", "Disable all logging except errors")).addOption(
|
|
13588
|
+
).addOption(new Option("--silent", "Disable all logging except errors")).addOption(
|
|
13589
|
+
new Option("--telemetry", "Enable telemetry collection").env("DOCS_MCP_TELEMETRY").argParser((value) => {
|
|
13590
|
+
if (value === void 0) {
|
|
13591
|
+
return process.env.DOCS_MCP_TELEMETRY !== "false" && process.env.DOCS_MCP_TELEMETRY !== "0";
|
|
13592
|
+
}
|
|
13593
|
+
return value;
|
|
13594
|
+
}).default(true)
|
|
13595
|
+
).addOption(new Option("--no-telemetry", "Disable telemetry collection")).addOption(
|
|
13596
|
+
new Option("--store-path <path>", "Custom path for data storage directory").env(
|
|
13597
|
+
"DOCS_MCP_STORE_PATH"
|
|
13598
|
+
)
|
|
13599
|
+
).enablePositionalOptions().allowExcessArguments(false).showHelpAfterError(true);
|
|
13526
13600
|
program.hook("preAction", async (thisCommand, actionCommand) => {
|
|
13527
13601
|
const globalOptions = thisCommand.opts();
|
|
13528
13602
|
setupLogging(globalOptions);
|
|
13603
|
+
initTelemetry({
|
|
13604
|
+
enabled: globalOptions.telemetry ?? true,
|
|
13605
|
+
storePath: globalOptions.storePath
|
|
13606
|
+
});
|
|
13529
13607
|
if (shouldEnableTelemetry()) {
|
|
13530
13608
|
if (analytics.isEnabled()) {
|
|
13531
13609
|
analytics.setGlobalContext({
|
|
@@ -13539,8 +13617,6 @@ function createCliProgram() {
|
|
|
13539
13617
|
commandStartTimes.set(commandKey, Date.now());
|
|
13540
13618
|
actionCommand._trackingKey = commandKey;
|
|
13541
13619
|
}
|
|
13542
|
-
} else {
|
|
13543
|
-
TelemetryConfig.getInstance().disable();
|
|
13544
13620
|
}
|
|
13545
13621
|
});
|
|
13546
13622
|
program.hook("postAction", async (_thisCommand, actionCommand) => {
|