@arabold/docs-mcp-server 1.25.0 → 1.25.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/dist/index.js +863 -694
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -541,32 +541,15 @@ class PostHogClient {
|
|
|
541
541
|
}
|
|
542
542
|
class TelemetryConfig {
|
|
543
543
|
static instance;
|
|
544
|
-
enabled;
|
|
544
|
+
enabled = true;
|
|
545
|
+
// Default to enabled
|
|
545
546
|
constructor() {
|
|
546
|
-
this.enabled = this.determineEnabledState();
|
|
547
|
-
}
|
|
548
|
-
/**
|
|
549
|
-
* Determines if telemetry should be enabled based on CLI flags and environment variables.
|
|
550
|
-
* Priority: CLI flags > environment variables > default (true)
|
|
551
|
-
*/
|
|
552
|
-
determineEnabledState() {
|
|
553
|
-
if (process.env.DOCS_MCP_TELEMETRY === "false") {
|
|
554
|
-
return false;
|
|
555
|
-
}
|
|
556
|
-
const args = process.argv;
|
|
557
|
-
if (args.includes("--no-telemetry")) {
|
|
558
|
-
return false;
|
|
559
|
-
}
|
|
560
|
-
return true;
|
|
561
547
|
}
|
|
562
548
|
isEnabled() {
|
|
563
549
|
return this.enabled;
|
|
564
550
|
}
|
|
565
|
-
|
|
566
|
-
this.enabled =
|
|
567
|
-
}
|
|
568
|
-
enable() {
|
|
569
|
-
this.enabled = true;
|
|
551
|
+
setEnabled(enabled) {
|
|
552
|
+
this.enabled = enabled;
|
|
570
553
|
}
|
|
571
554
|
static getInstance() {
|
|
572
555
|
if (!TelemetryConfig.instance) {
|
|
@@ -575,10 +558,9 @@ class TelemetryConfig {
|
|
|
575
558
|
return TelemetryConfig.instance;
|
|
576
559
|
}
|
|
577
560
|
}
|
|
578
|
-
function generateInstallationId() {
|
|
561
|
+
function generateInstallationId(storePath) {
|
|
579
562
|
try {
|
|
580
|
-
const
|
|
581
|
-
const dataDir = envStorePath || envPaths("docs-mcp-server", { suffix: "" }).data;
|
|
563
|
+
const dataDir = storePath || envPaths("docs-mcp-server", { suffix: "" }).data;
|
|
582
564
|
const installationIdPath = path.join(dataDir, "installation.id");
|
|
583
565
|
if (fs.existsSync(installationIdPath)) {
|
|
584
566
|
const existingId = fs.readFileSync(installationIdPath, "utf8").trim();
|
|
@@ -602,10 +584,10 @@ var TelemetryEvent = /* @__PURE__ */ ((TelemetryEvent2) => {
|
|
|
602
584
|
TelemetryEvent2["APP_SHUTDOWN"] = "app_shutdown";
|
|
603
585
|
TelemetryEvent2["CLI_COMMAND"] = "cli_command";
|
|
604
586
|
TelemetryEvent2["TOOL_USED"] = "tool_used";
|
|
605
|
-
TelemetryEvent2["HTTP_REQUEST_COMPLETED"] = "http_request_completed";
|
|
606
|
-
TelemetryEvent2["PIPELINE_JOB_PROGRESS"] = "pipeline_job_progress";
|
|
607
587
|
TelemetryEvent2["PIPELINE_JOB_COMPLETED"] = "pipeline_job_completed";
|
|
608
588
|
TelemetryEvent2["DOCUMENT_PROCESSED"] = "document_processed";
|
|
589
|
+
TelemetryEvent2["WEB_SEARCH_PERFORMED"] = "web_search_performed";
|
|
590
|
+
TelemetryEvent2["WEB_SCRAPE_STARTED"] = "web_scrape_started";
|
|
609
591
|
return TelemetryEvent2;
|
|
610
592
|
})(TelemetryEvent || {});
|
|
611
593
|
class Analytics {
|
|
@@ -623,6 +605,8 @@ class Analytics {
|
|
|
623
605
|
const analytics2 = new Analytics(shouldEnable);
|
|
624
606
|
if (analytics2.isEnabled()) {
|
|
625
607
|
logger.debug("Analytics enabled");
|
|
608
|
+
} else if (!config.isEnabled()) {
|
|
609
|
+
logger.debug("Analytics disabled (user preference)");
|
|
626
610
|
} else {
|
|
627
611
|
logger.debug("Analytics disabled");
|
|
628
612
|
}
|
|
@@ -682,38 +666,29 @@ class Analytics {
|
|
|
682
666
|
isEnabled() {
|
|
683
667
|
return this.enabled;
|
|
684
668
|
}
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
try {
|
|
691
|
-
const result = await operation();
|
|
692
|
-
this.track("tool_used", {
|
|
693
|
-
tool: toolName,
|
|
694
|
-
success: true,
|
|
695
|
-
durationMs: Date.now() - startTime,
|
|
696
|
-
...getProperties ? getProperties(result) : {}
|
|
697
|
-
});
|
|
698
|
-
return result;
|
|
699
|
-
} catch (error) {
|
|
700
|
-
this.track("tool_used", {
|
|
701
|
-
tool: toolName,
|
|
702
|
-
success: false,
|
|
703
|
-
durationMs: Date.now() - startTime
|
|
704
|
-
});
|
|
705
|
-
if (error instanceof Error) {
|
|
706
|
-
this.captureException(error, {
|
|
707
|
-
tool: toolName,
|
|
708
|
-
context: "tool_execution",
|
|
709
|
-
durationMs: Date.now() - startTime
|
|
710
|
-
});
|
|
711
|
-
}
|
|
712
|
-
throw error;
|
|
713
|
-
}
|
|
669
|
+
}
|
|
670
|
+
let analyticsInstance = null;
|
|
671
|
+
function getAnalytics() {
|
|
672
|
+
if (!analyticsInstance) {
|
|
673
|
+
analyticsInstance = Analytics.create();
|
|
714
674
|
}
|
|
675
|
+
return analyticsInstance;
|
|
715
676
|
}
|
|
716
|
-
|
|
677
|
+
function initTelemetry(options) {
|
|
678
|
+
TelemetryConfig.getInstance().setEnabled(options.enabled);
|
|
679
|
+
generateInstallationId(options.storePath);
|
|
680
|
+
analyticsInstance = Analytics.create();
|
|
681
|
+
}
|
|
682
|
+
const analytics = new Proxy({}, {
|
|
683
|
+
get(target, prop) {
|
|
684
|
+
if (!target.isEnabled) {
|
|
685
|
+
const instance = getAnalytics();
|
|
686
|
+
Object.setPrototypeOf(target, Object.getPrototypeOf(instance));
|
|
687
|
+
Object.assign(target, instance);
|
|
688
|
+
}
|
|
689
|
+
return target[prop];
|
|
690
|
+
}
|
|
691
|
+
});
|
|
717
692
|
function extractHostname(url) {
|
|
718
693
|
try {
|
|
719
694
|
const parsed = new URL(url);
|
|
@@ -734,7 +709,7 @@ function extractProtocol(urlOrPath) {
|
|
|
734
709
|
}
|
|
735
710
|
}
|
|
736
711
|
const name = "@arabold/docs-mcp-server";
|
|
737
|
-
const version = "1.
|
|
712
|
+
const version = "1.25.1";
|
|
738
713
|
const description = "MCP server for fetching and searching documentation";
|
|
739
714
|
const type = "module";
|
|
740
715
|
const bin = { "docs-mcp-server": "dist/index.js" };
|
|
@@ -4839,6 +4814,98 @@ class MarkdownMetadataExtractorMiddleware {
|
|
|
4839
4814
|
await next();
|
|
4840
4815
|
}
|
|
4841
4816
|
}
|
|
4817
|
+
class HtmlNormalizationMiddleware {
|
|
4818
|
+
async process(context, next) {
|
|
4819
|
+
if (!context.dom) {
|
|
4820
|
+
logger.debug(
|
|
4821
|
+
`Skipping HTML normalization for ${context.source} - no DOM available`
|
|
4822
|
+
);
|
|
4823
|
+
await next();
|
|
4824
|
+
return;
|
|
4825
|
+
}
|
|
4826
|
+
try {
|
|
4827
|
+
logger.debug(`Normalizing HTML URLs and links for ${context.source}`);
|
|
4828
|
+
const $ = context.dom;
|
|
4829
|
+
const baseUrl = context.source;
|
|
4830
|
+
this.normalizeImageUrls($, baseUrl);
|
|
4831
|
+
this.normalizeLinks($, baseUrl);
|
|
4832
|
+
logger.debug(`Successfully normalized HTML content for ${context.source}`);
|
|
4833
|
+
} catch (error) {
|
|
4834
|
+
logger.error(`❌ Failed to normalize HTML for ${context.source}: ${error}`);
|
|
4835
|
+
context.errors.push(
|
|
4836
|
+
error instanceof Error ? error : new Error(`HTML normalization failed: ${String(error)}`)
|
|
4837
|
+
);
|
|
4838
|
+
}
|
|
4839
|
+
await next();
|
|
4840
|
+
}
|
|
4841
|
+
/**
|
|
4842
|
+
* Normalizes image URLs by converting relative URLs to absolute URLs.
|
|
4843
|
+
*/
|
|
4844
|
+
normalizeImageUrls($, baseUrl) {
|
|
4845
|
+
$("img").each((_index, element) => {
|
|
4846
|
+
const $img = $(element);
|
|
4847
|
+
const src = $img.attr("src");
|
|
4848
|
+
if (!src) return;
|
|
4849
|
+
try {
|
|
4850
|
+
new URL(src);
|
|
4851
|
+
} catch {
|
|
4852
|
+
try {
|
|
4853
|
+
const absoluteUrl = new URL(src, baseUrl).href;
|
|
4854
|
+
$img.attr("src", absoluteUrl);
|
|
4855
|
+
logger.debug(`Converted relative image URL: ${src} → ${absoluteUrl}`);
|
|
4856
|
+
} catch (error) {
|
|
4857
|
+
logger.debug(`Failed to resolve relative image URL: ${src} - ${error}`);
|
|
4858
|
+
}
|
|
4859
|
+
}
|
|
4860
|
+
});
|
|
4861
|
+
}
|
|
4862
|
+
/**
|
|
4863
|
+
* Normalizes links by:
|
|
4864
|
+
* - Converting relative URLs to absolute URLs
|
|
4865
|
+
* - Unwrapping anchor links (preserving text content)
|
|
4866
|
+
* - Unwrapping non-HTTP links (preserving text content)
|
|
4867
|
+
*/
|
|
4868
|
+
normalizeLinks($, baseUrl) {
|
|
4869
|
+
$("a").each((_index, element) => {
|
|
4870
|
+
const $link = $(element);
|
|
4871
|
+
const href = $link.attr("href");
|
|
4872
|
+
if (!href) {
|
|
4873
|
+
this.unwrapElement($, $link);
|
|
4874
|
+
return;
|
|
4875
|
+
}
|
|
4876
|
+
if (href.startsWith("#")) {
|
|
4877
|
+
logger.debug(`Removing anchor link: ${href}`);
|
|
4878
|
+
this.unwrapElement($, $link);
|
|
4879
|
+
return;
|
|
4880
|
+
}
|
|
4881
|
+
try {
|
|
4882
|
+
const url = new URL(href);
|
|
4883
|
+
if (url.protocol !== "http:" && url.protocol !== "https:") {
|
|
4884
|
+
logger.debug(`Removing non-HTTP link: ${href}`);
|
|
4885
|
+
this.unwrapElement($, $link);
|
|
4886
|
+
return;
|
|
4887
|
+
}
|
|
4888
|
+
} catch {
|
|
4889
|
+
try {
|
|
4890
|
+
const absoluteUrl = new URL(href, baseUrl).href;
|
|
4891
|
+
$link.attr("href", absoluteUrl);
|
|
4892
|
+
logger.debug(`Converted relative link URL: ${href} → ${absoluteUrl}`);
|
|
4893
|
+
} catch (error) {
|
|
4894
|
+
logger.debug(`Failed to resolve relative link URL: ${href} - ${error}`);
|
|
4895
|
+
this.unwrapElement($, $link);
|
|
4896
|
+
}
|
|
4897
|
+
}
|
|
4898
|
+
});
|
|
4899
|
+
}
|
|
4900
|
+
/**
|
|
4901
|
+
* Unwraps an element by replacing it with its HTML content.
|
|
4902
|
+
* This preserves the inner HTML (including nested elements) while removing the wrapping tag.
|
|
4903
|
+
*/
|
|
4904
|
+
unwrapElement(_$, $element) {
|
|
4905
|
+
const htmlContent = $element.html() || $element.text();
|
|
4906
|
+
$element.replaceWith(htmlContent);
|
|
4907
|
+
}
|
|
4908
|
+
}
|
|
4842
4909
|
function detectCharsetFromHtml(htmlContent) {
|
|
4843
4910
|
const charsetMatch = htmlContent.match(
|
|
4844
4911
|
/<meta\s+charset\s*=\s*["']?([^"'>\s]+)["']?[^>]*>/i
|
|
@@ -4962,6 +5029,7 @@ class HtmlPipeline extends BasePipeline {
|
|
|
4962
5029
|
new HtmlMetadataExtractorMiddleware(),
|
|
4963
5030
|
new HtmlLinkExtractorMiddleware(),
|
|
4964
5031
|
new HtmlSanitizerMiddleware(),
|
|
5032
|
+
new HtmlNormalizationMiddleware(),
|
|
4965
5033
|
new HtmlToMarkdownMiddleware()
|
|
4966
5034
|
];
|
|
4967
5035
|
const semanticSplitter = new SemanticMarkdownSplitter(
|
|
@@ -7118,7 +7186,7 @@ class EmbeddingConfig {
|
|
|
7118
7186
|
}
|
|
7119
7187
|
}
|
|
7120
7188
|
/**
|
|
7121
|
-
* Parse embedding model configuration from
|
|
7189
|
+
* Parse embedding model configuration from a provided model specification.
|
|
7122
7190
|
* This is a synchronous operation that extracts provider, model, and known dimensions.
|
|
7123
7191
|
*
|
|
7124
7192
|
* Supports various providers:
|
|
@@ -7129,11 +7197,11 @@ class EmbeddingConfig {
|
|
|
7129
7197
|
* - microsoft: Azure OpenAI
|
|
7130
7198
|
* - sagemaker: AWS SageMaker hosted models
|
|
7131
7199
|
*
|
|
7132
|
-
* @param modelSpec
|
|
7200
|
+
* @param modelSpec Model specification (e.g., "openai:text-embedding-3-small"), defaults to "text-embedding-3-small"
|
|
7133
7201
|
* @returns Parsed embedding model configuration
|
|
7134
7202
|
*/
|
|
7135
7203
|
parse(modelSpec) {
|
|
7136
|
-
const spec = modelSpec ||
|
|
7204
|
+
const spec = modelSpec || "text-embedding-3-small";
|
|
7137
7205
|
const colonIndex = spec.indexOf(":");
|
|
7138
7206
|
let provider;
|
|
7139
7207
|
let model;
|
|
@@ -7331,16 +7399,13 @@ const CLI_DEFAULTS = {
|
|
|
7331
7399
|
TELEMETRY: true
|
|
7332
7400
|
};
|
|
7333
7401
|
function parseAuthConfig(options) {
|
|
7334
|
-
|
|
7335
|
-
if (!enabled) {
|
|
7402
|
+
if (!options.authEnabled) {
|
|
7336
7403
|
return void 0;
|
|
7337
7404
|
}
|
|
7338
|
-
const issuerUrl = options.authIssuerUrl ?? process.env.DOCS_MCP_AUTH_ISSUER_URL;
|
|
7339
|
-
const audience = options.authAudience ?? process.env.DOCS_MCP_AUTH_AUDIENCE;
|
|
7340
7405
|
return {
|
|
7341
|
-
enabled,
|
|
7342
|
-
issuerUrl,
|
|
7343
|
-
audience,
|
|
7406
|
+
enabled: true,
|
|
7407
|
+
issuerUrl: options.authIssuerUrl,
|
|
7408
|
+
audience: options.authAudience,
|
|
7344
7409
|
scopes: ["openid", "profile"]
|
|
7345
7410
|
// Default scopes for OAuth2/OIDC
|
|
7346
7411
|
};
|
|
@@ -7405,12 +7470,23 @@ function warnHttpUsage(authConfig, port) {
|
|
|
7405
7470
|
);
|
|
7406
7471
|
}
|
|
7407
7472
|
}
|
|
7408
|
-
function resolveEmbeddingContext(
|
|
7473
|
+
function resolveEmbeddingContext(embeddingModel) {
|
|
7409
7474
|
try {
|
|
7410
|
-
|
|
7411
|
-
|
|
7412
|
-
|
|
7413
|
-
|
|
7475
|
+
let modelSpec = embeddingModel;
|
|
7476
|
+
if (!modelSpec && process.env.OPENAI_API_KEY) {
|
|
7477
|
+
modelSpec = "text-embedding-3-small";
|
|
7478
|
+
logger.debug(
|
|
7479
|
+
"Using default OpenAI embedding model due to OPENAI_API_KEY presence."
|
|
7480
|
+
);
|
|
7481
|
+
}
|
|
7482
|
+
if (!modelSpec) {
|
|
7483
|
+
logger.debug(
|
|
7484
|
+
"No embedding model specified and OPENAI_API_KEY not found. Embeddings are disabled."
|
|
7485
|
+
);
|
|
7486
|
+
return null;
|
|
7487
|
+
}
|
|
7488
|
+
logger.debug(`Resolving embedding configuration for model: ${modelSpec}`);
|
|
7489
|
+
return EmbeddingConfig.parseEmbeddingConfig(modelSpec);
|
|
7414
7490
|
} catch (error) {
|
|
7415
7491
|
logger.debug(`Failed to resolve embedding configuration: ${error}`);
|
|
7416
7492
|
return null;
|
|
@@ -7431,55 +7507,42 @@ class CancelJobTool {
|
|
|
7431
7507
|
* @returns A promise that resolves with the outcome message.
|
|
7432
7508
|
*/
|
|
7433
7509
|
async execute(input) {
|
|
7434
|
-
|
|
7435
|
-
|
|
7436
|
-
|
|
7437
|
-
|
|
7438
|
-
const job = await this.pipeline.getJob(input.jobId);
|
|
7439
|
-
if (!job) {
|
|
7440
|
-
logger.warn(`❓ [CancelJobTool] Job not found: ${input.jobId}`);
|
|
7441
|
-
return {
|
|
7442
|
-
message: `Job with ID ${input.jobId} not found.`,
|
|
7443
|
-
success: false
|
|
7444
|
-
};
|
|
7445
|
-
}
|
|
7446
|
-
if (job.status === PipelineJobStatus.COMPLETED || // Use enum member
|
|
7447
|
-
job.status === PipelineJobStatus.FAILED || // Use enum member
|
|
7448
|
-
job.status === PipelineJobStatus.CANCELLED) {
|
|
7449
|
-
logger.debug(
|
|
7450
|
-
`Job ${input.jobId} is already in a final state: ${job.status}.`
|
|
7451
|
-
);
|
|
7452
|
-
return {
|
|
7453
|
-
message: `Job ${input.jobId} is already ${job.status}. No action taken.`,
|
|
7454
|
-
success: true
|
|
7455
|
-
// Considered success as no cancellation needed
|
|
7456
|
-
};
|
|
7457
|
-
}
|
|
7458
|
-
await this.pipeline.cancelJob(input.jobId);
|
|
7459
|
-
const updatedJob = await this.pipeline.getJob(input.jobId);
|
|
7460
|
-
const finalStatus = updatedJob?.status ?? "UNKNOWN (job disappeared?)";
|
|
7461
|
-
logger.debug(
|
|
7462
|
-
`Cancellation requested for job ${input.jobId}. Current status: ${finalStatus}`
|
|
7463
|
-
);
|
|
7464
|
-
return {
|
|
7465
|
-
message: `Cancellation requested for job ${input.jobId}. Current status: ${finalStatus}.`,
|
|
7466
|
-
success: true
|
|
7467
|
-
};
|
|
7468
|
-
} catch (error) {
|
|
7469
|
-
logger.error(`❌ Error cancelling job ${input.jobId}: ${error}`);
|
|
7470
|
-
return {
|
|
7471
|
-
message: `Failed to cancel job ${input.jobId}: ${error instanceof Error ? error.message : String(error)}`,
|
|
7472
|
-
success: false
|
|
7473
|
-
};
|
|
7474
|
-
}
|
|
7475
|
-
},
|
|
7476
|
-
(result) => {
|
|
7510
|
+
try {
|
|
7511
|
+
const job = await this.pipeline.getJob(input.jobId);
|
|
7512
|
+
if (!job) {
|
|
7513
|
+
logger.warn(`❓ [CancelJobTool] Job not found: ${input.jobId}`);
|
|
7477
7514
|
return {
|
|
7478
|
-
|
|
7479
|
-
|
|
7515
|
+
message: `Job with ID ${input.jobId} not found.`,
|
|
7516
|
+
success: false
|
|
7480
7517
|
};
|
|
7481
7518
|
}
|
|
7482
|
-
|
|
7519
|
+
if (job.status === PipelineJobStatus.COMPLETED || // Use enum member
|
|
7520
|
+
job.status === PipelineJobStatus.FAILED || // Use enum member
|
|
7521
|
+
job.status === PipelineJobStatus.CANCELLED) {
|
|
7522
|
+
logger.debug(`Job ${input.jobId} is already in a final state: ${job.status}.`);
|
|
7523
|
+
return {
|
|
7524
|
+
message: `Job ${input.jobId} is already ${job.status}. No action taken.`,
|
|
7525
|
+
success: true
|
|
7526
|
+
// Considered success as no cancellation needed
|
|
7527
|
+
};
|
|
7528
|
+
}
|
|
7529
|
+
await this.pipeline.cancelJob(input.jobId);
|
|
7530
|
+
const updatedJob = await this.pipeline.getJob(input.jobId);
|
|
7531
|
+
const finalStatus = updatedJob?.status ?? "UNKNOWN (job disappeared?)";
|
|
7532
|
+
logger.debug(
|
|
7533
|
+
`Cancellation requested for job ${input.jobId}. Current status: ${finalStatus}`
|
|
7534
|
+
);
|
|
7535
|
+
return {
|
|
7536
|
+
message: `Cancellation requested for job ${input.jobId}. Current status: ${finalStatus}.`,
|
|
7537
|
+
success: true
|
|
7538
|
+
};
|
|
7539
|
+
} catch (error) {
|
|
7540
|
+
logger.error(`❌ Error cancelling job ${input.jobId}: ${error}`);
|
|
7541
|
+
return {
|
|
7542
|
+
message: `Failed to cancel job ${input.jobId}: ${error instanceof Error ? error.message : String(error)}`,
|
|
7543
|
+
success: false
|
|
7544
|
+
};
|
|
7545
|
+
}
|
|
7483
7546
|
}
|
|
7484
7547
|
}
|
|
7485
7548
|
class ClearCompletedJobsTool {
|
|
@@ -7497,33 +7560,24 @@ class ClearCompletedJobsTool {
|
|
|
7497
7560
|
* @returns A promise that resolves with the outcome of the clear operation.
|
|
7498
7561
|
*/
|
|
7499
7562
|
async execute(_input) {
|
|
7500
|
-
|
|
7501
|
-
|
|
7502
|
-
|
|
7503
|
-
|
|
7504
|
-
|
|
7505
|
-
|
|
7506
|
-
|
|
7507
|
-
|
|
7508
|
-
|
|
7509
|
-
|
|
7510
|
-
|
|
7511
|
-
|
|
7512
|
-
|
|
7513
|
-
|
|
7514
|
-
|
|
7515
|
-
|
|
7516
|
-
|
|
7517
|
-
|
|
7518
|
-
clearedCount: 0
|
|
7519
|
-
};
|
|
7520
|
-
}
|
|
7521
|
-
},
|
|
7522
|
-
(result) => ({
|
|
7523
|
-
success: result.success,
|
|
7524
|
-
clearedCount: result.clearedCount
|
|
7525
|
-
})
|
|
7526
|
-
);
|
|
7563
|
+
try {
|
|
7564
|
+
const clearedCount = await this.pipeline.clearCompletedJobs();
|
|
7565
|
+
const message = clearedCount > 0 ? `Successfully cleared ${clearedCount} completed job${clearedCount === 1 ? "" : "s"} from the queue.` : "No completed jobs to clear.";
|
|
7566
|
+
logger.debug(message);
|
|
7567
|
+
return {
|
|
7568
|
+
message,
|
|
7569
|
+
success: true,
|
|
7570
|
+
clearedCount
|
|
7571
|
+
};
|
|
7572
|
+
} catch (error) {
|
|
7573
|
+
const errorMessage = `Failed to clear completed jobs: ${error instanceof Error ? error.message : String(error)}`;
|
|
7574
|
+
logger.error(`❌ ${errorMessage}`);
|
|
7575
|
+
return {
|
|
7576
|
+
message: errorMessage,
|
|
7577
|
+
success: false,
|
|
7578
|
+
clearedCount: 0
|
|
7579
|
+
};
|
|
7580
|
+
}
|
|
7527
7581
|
}
|
|
7528
7582
|
}
|
|
7529
7583
|
class ToolError extends Error {
|
|
@@ -7583,103 +7637,88 @@ class FetchUrlTool {
|
|
|
7583
7637
|
* @throws {ToolError} If fetching or processing fails
|
|
7584
7638
|
*/
|
|
7585
7639
|
async execute(options) {
|
|
7586
|
-
|
|
7587
|
-
|
|
7588
|
-
|
|
7589
|
-
|
|
7590
|
-
|
|
7591
|
-
|
|
7592
|
-
|
|
7593
|
-
|
|
7594
|
-
|
|
7595
|
-
|
|
7596
|
-
|
|
7597
|
-
|
|
7598
|
-
|
|
7599
|
-
|
|
7600
|
-
|
|
7601
|
-
|
|
7602
|
-
|
|
7603
|
-
|
|
7604
|
-
|
|
7605
|
-
|
|
7606
|
-
|
|
7607
|
-
|
|
7608
|
-
|
|
7609
|
-
|
|
7610
|
-
|
|
7611
|
-
|
|
7612
|
-
|
|
7613
|
-
|
|
7614
|
-
|
|
7615
|
-
|
|
7616
|
-
|
|
7617
|
-
|
|
7618
|
-
|
|
7619
|
-
|
|
7620
|
-
|
|
7621
|
-
|
|
7622
|
-
|
|
7623
|
-
|
|
7624
|
-
|
|
7625
|
-
|
|
7626
|
-
|
|
7627
|
-
// propagate custom headers
|
|
7628
|
-
},
|
|
7629
|
-
fetcher
|
|
7630
|
-
);
|
|
7631
|
-
break;
|
|
7632
|
-
}
|
|
7633
|
-
}
|
|
7634
|
-
if (!processed) {
|
|
7635
|
-
logger.warn(
|
|
7636
|
-
`⚠️ Unsupported content type "${rawContent.mimeType}" for ${url}. Returning raw content.`
|
|
7637
|
-
);
|
|
7638
|
-
const resolvedCharset = resolveCharset(
|
|
7639
|
-
rawContent.charset,
|
|
7640
|
-
rawContent.content,
|
|
7641
|
-
rawContent.mimeType
|
|
7642
|
-
);
|
|
7643
|
-
const contentString = convertToString(rawContent.content, resolvedCharset);
|
|
7644
|
-
return contentString;
|
|
7645
|
-
}
|
|
7646
|
-
for (const err of processed.errors) {
|
|
7647
|
-
logger.warn(`⚠️ Processing error for ${url}: ${err.message}`);
|
|
7648
|
-
}
|
|
7649
|
-
if (typeof processed.textContent !== "string" || !processed.textContent.trim()) {
|
|
7650
|
-
throw new ToolError(
|
|
7651
|
-
`Processing resulted in empty content for ${url}`,
|
|
7652
|
-
this.constructor.name
|
|
7653
|
-
);
|
|
7654
|
-
}
|
|
7655
|
-
logger.info(`✅ Successfully processed ${url}`);
|
|
7656
|
-
return processed.textContent;
|
|
7657
|
-
} catch (error) {
|
|
7658
|
-
if (error instanceof ScraperError || error instanceof ToolError) {
|
|
7659
|
-
throw new ToolError(
|
|
7660
|
-
`Failed to fetch or process URL: ${error.message}`,
|
|
7661
|
-
this.constructor.name
|
|
7662
|
-
);
|
|
7663
|
-
}
|
|
7664
|
-
throw new ToolError(
|
|
7665
|
-
`Failed to fetch or process URL: ${error instanceof Error ? error.message : String(error)}`,
|
|
7666
|
-
this.constructor.name
|
|
7640
|
+
const { url, scrapeMode = ScrapeMode.Auto, headers } = options;
|
|
7641
|
+
const canFetchResults = this.fetchers.map((f) => f.canFetch(url));
|
|
7642
|
+
const fetcherIndex = canFetchResults.indexOf(true);
|
|
7643
|
+
if (fetcherIndex === -1) {
|
|
7644
|
+
throw new ToolError(
|
|
7645
|
+
`Invalid URL: ${url}. Must be an HTTP/HTTPS URL or a file:// URL.`,
|
|
7646
|
+
this.constructor.name
|
|
7647
|
+
);
|
|
7648
|
+
}
|
|
7649
|
+
const fetcher = this.fetchers[fetcherIndex];
|
|
7650
|
+
logger.debug(`Using fetcher "${fetcher.constructor.name}" for URL: ${url}`);
|
|
7651
|
+
try {
|
|
7652
|
+
logger.info(`📡 Fetching ${url}...`);
|
|
7653
|
+
const rawContent = await fetcher.fetch(url, {
|
|
7654
|
+
followRedirects: options.followRedirects ?? true,
|
|
7655
|
+
maxRetries: 3,
|
|
7656
|
+
headers
|
|
7657
|
+
// propagate custom headers
|
|
7658
|
+
});
|
|
7659
|
+
logger.info("🔄 Processing content...");
|
|
7660
|
+
let processed;
|
|
7661
|
+
for (const pipeline of this.pipelines) {
|
|
7662
|
+
if (pipeline.canProcess(rawContent)) {
|
|
7663
|
+
processed = await pipeline.process(
|
|
7664
|
+
rawContent,
|
|
7665
|
+
{
|
|
7666
|
+
url,
|
|
7667
|
+
library: "",
|
|
7668
|
+
version: "",
|
|
7669
|
+
maxDepth: 0,
|
|
7670
|
+
maxPages: 1,
|
|
7671
|
+
maxConcurrency: 1,
|
|
7672
|
+
scope: "subpages",
|
|
7673
|
+
followRedirects: options.followRedirects ?? true,
|
|
7674
|
+
excludeSelectors: void 0,
|
|
7675
|
+
ignoreErrors: false,
|
|
7676
|
+
scrapeMode,
|
|
7677
|
+
headers
|
|
7678
|
+
// propagate custom headers
|
|
7679
|
+
},
|
|
7680
|
+
fetcher
|
|
7667
7681
|
);
|
|
7668
|
-
|
|
7669
|
-
await Promise.allSettled(this.pipelines.map((pipeline) => pipeline.close()));
|
|
7682
|
+
break;
|
|
7670
7683
|
}
|
|
7671
|
-
},
|
|
7672
|
-
(result) => {
|
|
7673
|
-
const { url, scrapeMode, followRedirects, headers } = options;
|
|
7674
|
-
return {
|
|
7675
|
-
url,
|
|
7676
|
-
scrapeMode,
|
|
7677
|
-
followRedirects,
|
|
7678
|
-
contentLength: result.length,
|
|
7679
|
-
hasHeaders: !!headers
|
|
7680
|
-
};
|
|
7681
7684
|
}
|
|
7682
|
-
|
|
7685
|
+
if (!processed) {
|
|
7686
|
+
logger.warn(
|
|
7687
|
+
`⚠️ Unsupported content type "${rawContent.mimeType}" for ${url}. Returning raw content.`
|
|
7688
|
+
);
|
|
7689
|
+
const resolvedCharset = resolveCharset(
|
|
7690
|
+
rawContent.charset,
|
|
7691
|
+
rawContent.content,
|
|
7692
|
+
rawContent.mimeType
|
|
7693
|
+
);
|
|
7694
|
+
const contentString = convertToString(rawContent.content, resolvedCharset);
|
|
7695
|
+
return contentString;
|
|
7696
|
+
}
|
|
7697
|
+
for (const err of processed.errors) {
|
|
7698
|
+
logger.warn(`⚠️ Processing error for ${url}: ${err.message}`);
|
|
7699
|
+
}
|
|
7700
|
+
if (typeof processed.textContent !== "string" || !processed.textContent.trim()) {
|
|
7701
|
+
throw new ToolError(
|
|
7702
|
+
`Processing resulted in empty content for ${url}`,
|
|
7703
|
+
this.constructor.name
|
|
7704
|
+
);
|
|
7705
|
+
}
|
|
7706
|
+
logger.info(`✅ Successfully processed ${url}`);
|
|
7707
|
+
return processed.textContent;
|
|
7708
|
+
} catch (error) {
|
|
7709
|
+
if (error instanceof ScraperError || error instanceof ToolError) {
|
|
7710
|
+
throw new ToolError(
|
|
7711
|
+
`Failed to fetch or process URL: ${error.message}`,
|
|
7712
|
+
this.constructor.name
|
|
7713
|
+
);
|
|
7714
|
+
}
|
|
7715
|
+
throw new ToolError(
|
|
7716
|
+
`Failed to fetch or process URL: ${error instanceof Error ? error.message : String(error)}`,
|
|
7717
|
+
this.constructor.name
|
|
7718
|
+
);
|
|
7719
|
+
} finally {
|
|
7720
|
+
await Promise.allSettled(this.pipelines.map((pipeline) => pipeline.close()));
|
|
7721
|
+
}
|
|
7683
7722
|
}
|
|
7684
7723
|
}
|
|
7685
7724
|
class FindVersionTool {
|
|
@@ -7692,50 +7731,36 @@ class FindVersionTool {
|
|
|
7692
7731
|
* @returns A descriptive string indicating the best match and unversioned status, or an error message.
|
|
7693
7732
|
*/
|
|
7694
7733
|
async execute(options) {
|
|
7695
|
-
|
|
7696
|
-
|
|
7697
|
-
|
|
7698
|
-
|
|
7699
|
-
|
|
7700
|
-
|
|
7701
|
-
|
|
7702
|
-
|
|
7703
|
-
|
|
7704
|
-
|
|
7705
|
-
|
|
7706
|
-
|
|
7707
|
-
|
|
7708
|
-
|
|
7709
|
-
|
|
7710
|
-
|
|
7711
|
-
|
|
7712
|
-
message = `No matching version found for ${libraryAndVersion}, but unversioned docs exist.`;
|
|
7713
|
-
} else {
|
|
7714
|
-
message = `No matching version or unversioned documents found for ${libraryAndVersion}.`;
|
|
7715
|
-
}
|
|
7716
|
-
return { message, bestMatch, hasUnversioned };
|
|
7717
|
-
} catch (error) {
|
|
7718
|
-
if (error instanceof VersionNotFoundError) {
|
|
7719
|
-
logger.info(`ℹ️ Version not found: ${error.message}`);
|
|
7720
|
-
const message = `No matching version or unversioned documents found for ${libraryAndVersion}. Available: ${error.availableVersions.length > 0 ? error.availableVersions.map((v) => v.version).join(", ") : "None"}.`;
|
|
7721
|
-
return { message, bestMatch: null, hasUnversioned: false };
|
|
7722
|
-
}
|
|
7723
|
-
logger.error(
|
|
7724
|
-
`❌ Error finding version for ${libraryAndVersion}: ${error instanceof Error ? error.message : error}`
|
|
7725
|
-
);
|
|
7726
|
-
throw error;
|
|
7727
|
-
}
|
|
7728
|
-
},
|
|
7729
|
-
(result) => {
|
|
7730
|
-
const { library, targetVersion } = options;
|
|
7731
|
-
return {
|
|
7732
|
-
library,
|
|
7733
|
-
targetVersion,
|
|
7734
|
-
foundMatch: !!result.bestMatch,
|
|
7735
|
-
hasUnversioned: result.hasUnversioned
|
|
7736
|
-
};
|
|
7734
|
+
const { library, targetVersion } = options;
|
|
7735
|
+
const libraryAndVersion = `${library}${targetVersion ? `@${targetVersion}` : ""}`;
|
|
7736
|
+
try {
|
|
7737
|
+
const { bestMatch, hasUnversioned } = await this.docService.findBestVersion(
|
|
7738
|
+
library,
|
|
7739
|
+
targetVersion
|
|
7740
|
+
);
|
|
7741
|
+
let message = "";
|
|
7742
|
+
if (bestMatch) {
|
|
7743
|
+
message = `Best match: ${bestMatch}.`;
|
|
7744
|
+
if (hasUnversioned) {
|
|
7745
|
+
message += " Unversioned docs also available.";
|
|
7746
|
+
}
|
|
7747
|
+
} else if (hasUnversioned) {
|
|
7748
|
+
message = `No matching version found for ${libraryAndVersion}, but unversioned docs exist.`;
|
|
7749
|
+
} else {
|
|
7750
|
+
message = `No matching version or unversioned documents found for ${libraryAndVersion}.`;
|
|
7737
7751
|
}
|
|
7738
|
-
|
|
7752
|
+
return message;
|
|
7753
|
+
} catch (error) {
|
|
7754
|
+
if (error instanceof VersionNotFoundError) {
|
|
7755
|
+
logger.info(`ℹ️ Version not found: ${error.message}`);
|
|
7756
|
+
const message = `No matching version or unversioned documents found for ${libraryAndVersion}. Available: ${error.availableVersions.length > 0 ? error.availableVersions.map((v) => v.version).join(", ") : "None"}.`;
|
|
7757
|
+
return message;
|
|
7758
|
+
}
|
|
7759
|
+
logger.error(
|
|
7760
|
+
`❌ Error finding version for ${libraryAndVersion}: ${error instanceof Error ? error.message : error}`
|
|
7761
|
+
);
|
|
7762
|
+
throw error;
|
|
7763
|
+
}
|
|
7739
7764
|
}
|
|
7740
7765
|
}
|
|
7741
7766
|
class GetJobInfoTool {
|
|
@@ -7753,41 +7778,29 @@ class GetJobInfoTool {
|
|
|
7753
7778
|
* @returns A promise that resolves with the simplified job info or null if not found.
|
|
7754
7779
|
*/
|
|
7755
7780
|
async execute(input) {
|
|
7756
|
-
|
|
7757
|
-
|
|
7758
|
-
|
|
7759
|
-
|
|
7760
|
-
|
|
7761
|
-
|
|
7762
|
-
|
|
7763
|
-
|
|
7764
|
-
|
|
7765
|
-
|
|
7766
|
-
|
|
7767
|
-
|
|
7768
|
-
|
|
7769
|
-
|
|
7770
|
-
|
|
7771
|
-
|
|
7772
|
-
|
|
7773
|
-
|
|
7774
|
-
|
|
7775
|
-
|
|
7776
|
-
|
|
7777
|
-
|
|
7778
|
-
|
|
7779
|
-
errorMessage: job.errorMessage ?? void 0
|
|
7780
|
-
};
|
|
7781
|
-
return { job: jobInfo };
|
|
7782
|
-
},
|
|
7783
|
-
(result) => {
|
|
7784
|
-
return {
|
|
7785
|
-
found: result.job !== null,
|
|
7786
|
-
library: result.job?.library,
|
|
7787
|
-
version: result.job?.version
|
|
7788
|
-
};
|
|
7789
|
-
}
|
|
7790
|
-
);
|
|
7781
|
+
const job = await this.pipeline.getJob(input.jobId);
|
|
7782
|
+
if (!job) {
|
|
7783
|
+
return { job: null };
|
|
7784
|
+
}
|
|
7785
|
+
const jobInfo = {
|
|
7786
|
+
id: job.id,
|
|
7787
|
+
library: job.library,
|
|
7788
|
+
version: job.version,
|
|
7789
|
+
status: job.status,
|
|
7790
|
+
dbStatus: job.versionStatus,
|
|
7791
|
+
createdAt: job.createdAt.toISOString(),
|
|
7792
|
+
startedAt: job.startedAt?.toISOString() ?? null,
|
|
7793
|
+
finishedAt: job.finishedAt?.toISOString() ?? null,
|
|
7794
|
+
error: job.error?.message ?? null,
|
|
7795
|
+
progress: job.progressMaxPages && job.progressMaxPages > 0 ? {
|
|
7796
|
+
pages: job.progressPages || 0,
|
|
7797
|
+
totalPages: job.progressMaxPages,
|
|
7798
|
+
totalDiscovered: job.progress?.totalDiscovered || job.progressMaxPages
|
|
7799
|
+
} : void 0,
|
|
7800
|
+
updatedAt: job.updatedAt?.toISOString(),
|
|
7801
|
+
errorMessage: job.errorMessage ?? void 0
|
|
7802
|
+
};
|
|
7803
|
+
return { job: jobInfo };
|
|
7791
7804
|
}
|
|
7792
7805
|
}
|
|
7793
7806
|
class ListJobsTool {
|
|
@@ -7805,45 +7818,28 @@ class ListJobsTool {
|
|
|
7805
7818
|
* @returns A promise that resolves with the list of simplified job objects.
|
|
7806
7819
|
*/
|
|
7807
7820
|
async execute(input) {
|
|
7808
|
-
|
|
7809
|
-
|
|
7810
|
-
|
|
7811
|
-
|
|
7812
|
-
|
|
7813
|
-
|
|
7814
|
-
|
|
7815
|
-
|
|
7816
|
-
|
|
7817
|
-
|
|
7818
|
-
|
|
7819
|
-
|
|
7820
|
-
|
|
7821
|
-
|
|
7822
|
-
|
|
7823
|
-
|
|
7824
|
-
|
|
7825
|
-
|
|
7826
|
-
|
|
7827
|
-
|
|
7828
|
-
|
|
7829
|
-
|
|
7830
|
-
};
|
|
7831
|
-
});
|
|
7832
|
-
return { jobs: simplifiedJobs };
|
|
7833
|
-
},
|
|
7834
|
-
(result) => {
|
|
7835
|
-
return {
|
|
7836
|
-
jobCount: result.jobs.length,
|
|
7837
|
-
statusCounts: result.jobs.reduce(
|
|
7838
|
-
(acc, job) => {
|
|
7839
|
-
acc[job.status] = (acc[job.status] || 0) + 1;
|
|
7840
|
-
return acc;
|
|
7841
|
-
},
|
|
7842
|
-
{}
|
|
7843
|
-
)
|
|
7844
|
-
};
|
|
7845
|
-
}
|
|
7846
|
-
);
|
|
7821
|
+
const jobs = await this.pipeline.getJobs(input.status);
|
|
7822
|
+
const simplifiedJobs = jobs.map((job) => {
|
|
7823
|
+
return {
|
|
7824
|
+
id: job.id,
|
|
7825
|
+
library: job.library,
|
|
7826
|
+
version: job.version,
|
|
7827
|
+
status: job.status,
|
|
7828
|
+
dbStatus: job.versionStatus,
|
|
7829
|
+
createdAt: job.createdAt.toISOString(),
|
|
7830
|
+
startedAt: job.startedAt?.toISOString() ?? null,
|
|
7831
|
+
finishedAt: job.finishedAt?.toISOString() ?? null,
|
|
7832
|
+
error: job.error?.message ?? null,
|
|
7833
|
+
progress: job.progressMaxPages && job.progressMaxPages > 0 ? {
|
|
7834
|
+
pages: job.progressPages || 0,
|
|
7835
|
+
totalPages: job.progressMaxPages,
|
|
7836
|
+
totalDiscovered: job.progress?.totalDiscovered || job.progressMaxPages
|
|
7837
|
+
} : void 0,
|
|
7838
|
+
updatedAt: job.updatedAt?.toISOString(),
|
|
7839
|
+
errorMessage: job.errorMessage ?? void 0
|
|
7840
|
+
};
|
|
7841
|
+
});
|
|
7842
|
+
return { jobs: simplifiedJobs };
|
|
7847
7843
|
}
|
|
7848
7844
|
}
|
|
7849
7845
|
class ListLibrariesTool {
|
|
@@ -7852,32 +7848,20 @@ class ListLibrariesTool {
|
|
|
7852
7848
|
this.docService = docService;
|
|
7853
7849
|
}
|
|
7854
7850
|
async execute(_options) {
|
|
7855
|
-
|
|
7856
|
-
|
|
7857
|
-
|
|
7858
|
-
|
|
7859
|
-
|
|
7860
|
-
|
|
7861
|
-
|
|
7862
|
-
|
|
7863
|
-
|
|
7864
|
-
|
|
7865
|
-
|
|
7866
|
-
|
|
7867
|
-
|
|
7868
|
-
|
|
7869
|
-
}))
|
|
7870
|
-
}));
|
|
7871
|
-
return { libraries };
|
|
7872
|
-
},
|
|
7873
|
-
(result) => ({
|
|
7874
|
-
libraryCount: result.libraries.length,
|
|
7875
|
-
totalVersions: result.libraries.reduce(
|
|
7876
|
-
(sum, lib) => sum + lib.versions.length,
|
|
7877
|
-
0
|
|
7878
|
-
)
|
|
7879
|
-
})
|
|
7880
|
-
);
|
|
7851
|
+
const rawLibraries = await this.docService.listLibraries();
|
|
7852
|
+
const libraries = rawLibraries.map(({ library, versions }) => ({
|
|
7853
|
+
name: library,
|
|
7854
|
+
versions: versions.map((v) => ({
|
|
7855
|
+
version: v.ref.version,
|
|
7856
|
+
documentCount: v.counts.documents,
|
|
7857
|
+
uniqueUrlCount: v.counts.uniqueUrls,
|
|
7858
|
+
indexedAt: v.indexedAt,
|
|
7859
|
+
status: v.status,
|
|
7860
|
+
...v.progress ? { progress: v.progress } : void 0,
|
|
7861
|
+
sourceUrl: v.sourceUrl
|
|
7862
|
+
}))
|
|
7863
|
+
}));
|
|
7864
|
+
return { libraries };
|
|
7881
7865
|
}
|
|
7882
7866
|
}
|
|
7883
7867
|
class RemoveTool {
|
|
@@ -7891,42 +7875,29 @@ class RemoveTool {
|
|
|
7891
7875
|
* Removes all documents, the version record, and the library if no other versions exist.
|
|
7892
7876
|
*/
|
|
7893
7877
|
async execute(args) {
|
|
7894
|
-
|
|
7895
|
-
|
|
7896
|
-
|
|
7897
|
-
|
|
7898
|
-
|
|
7899
|
-
|
|
7900
|
-
|
|
7901
|
-
|
|
7902
|
-
|
|
7903
|
-
|
|
7904
|
-
|
|
7905
|
-
|
|
7906
|
-
|
|
7907
|
-
);
|
|
7908
|
-
await this.pipeline.cancelJob(job.id);
|
|
7909
|
-
await this.pipeline.waitForJobCompletion(job.id);
|
|
7910
|
-
}
|
|
7911
|
-
await this.documentManagementService.removeVersion(library, version2);
|
|
7912
|
-
const message = `Successfully removed ${library}${version2 ? `@${version2}` : ""}.`;
|
|
7913
|
-
logger.info(`✅ ${message}`);
|
|
7914
|
-
return { message };
|
|
7915
|
-
} catch (error) {
|
|
7916
|
-
const errorMessage = `Failed to remove ${library}${version2 ? `@${version2}` : ""}: ${error instanceof Error ? error.message : String(error)}`;
|
|
7917
|
-
logger.error(`❌ Error removing library: ${errorMessage}`);
|
|
7918
|
-
throw new ToolError(errorMessage, this.constructor.name);
|
|
7919
|
-
}
|
|
7920
|
-
},
|
|
7921
|
-
() => {
|
|
7922
|
-
const { library, version: version2 } = args;
|
|
7923
|
-
return {
|
|
7924
|
-
library,
|
|
7925
|
-
version: version2
|
|
7926
|
-
// Success is implicit since if this callback runs, no exception was thrown
|
|
7927
|
-
};
|
|
7878
|
+
const { library, version: version2 } = args;
|
|
7879
|
+
logger.info(`🗑️ Removing library: ${library}${version2 ? `@${version2}` : ""}`);
|
|
7880
|
+
try {
|
|
7881
|
+
const allJobs = await this.pipeline.getJobs();
|
|
7882
|
+
const jobs = allJobs.filter(
|
|
7883
|
+
(job) => job.library === library && job.version === (version2 ?? "") && (job.status === PipelineJobStatus.QUEUED || job.status === PipelineJobStatus.RUNNING)
|
|
7884
|
+
);
|
|
7885
|
+
for (const job of jobs) {
|
|
7886
|
+
logger.info(
|
|
7887
|
+
`🚫 Aborting job for ${library}@${version2 ?? ""} before deletion: ${job.id}`
|
|
7888
|
+
);
|
|
7889
|
+
await this.pipeline.cancelJob(job.id);
|
|
7890
|
+
await this.pipeline.waitForJobCompletion(job.id);
|
|
7928
7891
|
}
|
|
7929
|
-
|
|
7892
|
+
await this.documentManagementService.removeVersion(library, version2);
|
|
7893
|
+
const message = `Successfully removed ${library}${version2 ? `@${version2}` : ""}.`;
|
|
7894
|
+
logger.info(`✅ ${message}`);
|
|
7895
|
+
return { message };
|
|
7896
|
+
} catch (error) {
|
|
7897
|
+
const errorMessage = `Failed to remove ${library}${version2 ? `@${version2}` : ""}: ${error instanceof Error ? error.message : String(error)}`;
|
|
7898
|
+
logger.error(`❌ Error removing library: ${errorMessage}`);
|
|
7899
|
+
throw new ToolError(errorMessage, this.constructor.name);
|
|
7900
|
+
}
|
|
7930
7901
|
}
|
|
7931
7902
|
}
|
|
7932
7903
|
class ScrapeTool {
|
|
@@ -7942,80 +7913,66 @@ class ScrapeTool {
|
|
|
7942
7913
|
options: scraperOptions,
|
|
7943
7914
|
waitForCompletion = true
|
|
7944
7915
|
} = options;
|
|
7945
|
-
|
|
7946
|
-
|
|
7947
|
-
|
|
7948
|
-
|
|
7949
|
-
|
|
7950
|
-
|
|
7951
|
-
|
|
7916
|
+
let internalVersion;
|
|
7917
|
+
const partialVersionRegex = /^\d+(\.\d+)?$/;
|
|
7918
|
+
if (version2 === null || version2 === void 0) {
|
|
7919
|
+
internalVersion = "";
|
|
7920
|
+
} else {
|
|
7921
|
+
const validFullVersion = semver.valid(version2);
|
|
7922
|
+
if (validFullVersion) {
|
|
7923
|
+
internalVersion = validFullVersion;
|
|
7924
|
+
} else if (partialVersionRegex.test(version2)) {
|
|
7925
|
+
const coercedVersion = semver.coerce(version2);
|
|
7926
|
+
if (coercedVersion) {
|
|
7927
|
+
internalVersion = coercedVersion.version;
|
|
7952
7928
|
} else {
|
|
7953
|
-
|
|
7954
|
-
|
|
7955
|
-
|
|
7956
|
-
} else if (partialVersionRegex.test(version2)) {
|
|
7957
|
-
const coercedVersion = semver.coerce(version2);
|
|
7958
|
-
if (coercedVersion) {
|
|
7959
|
-
internalVersion = coercedVersion.version;
|
|
7960
|
-
} else {
|
|
7961
|
-
throw new Error(
|
|
7962
|
-
`Invalid version format for scraping: '${version2}'. Use 'X.Y.Z', 'X.Y.Z-prerelease', 'X.Y', 'X', or omit.`
|
|
7963
|
-
);
|
|
7964
|
-
}
|
|
7965
|
-
} else {
|
|
7966
|
-
throw new Error(
|
|
7967
|
-
`Invalid version format for scraping: '${version2}'. Use 'X.Y.Z', 'X.Y.Z-prerelease', 'X.Y', 'X', or omit.`
|
|
7968
|
-
);
|
|
7969
|
-
}
|
|
7970
|
-
}
|
|
7971
|
-
internalVersion = internalVersion.toLowerCase();
|
|
7972
|
-
const pipeline = this.pipeline;
|
|
7973
|
-
const enqueueVersion = internalVersion === "" ? null : internalVersion;
|
|
7974
|
-
const jobId = await pipeline.enqueueJob(library, enqueueVersion, {
|
|
7975
|
-
url,
|
|
7976
|
-
library,
|
|
7977
|
-
version: internalVersion,
|
|
7978
|
-
scope: scraperOptions?.scope ?? "subpages",
|
|
7979
|
-
followRedirects: scraperOptions?.followRedirects ?? true,
|
|
7980
|
-
maxPages: scraperOptions?.maxPages ?? DEFAULT_MAX_PAGES,
|
|
7981
|
-
maxDepth: scraperOptions?.maxDepth ?? DEFAULT_MAX_DEPTH$1,
|
|
7982
|
-
maxConcurrency: scraperOptions?.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY,
|
|
7983
|
-
ignoreErrors: scraperOptions?.ignoreErrors ?? true,
|
|
7984
|
-
scrapeMode: scraperOptions?.scrapeMode ?? ScrapeMode.Auto,
|
|
7985
|
-
// Pass scrapeMode enum
|
|
7986
|
-
includePatterns: scraperOptions?.includePatterns,
|
|
7987
|
-
excludePatterns: scraperOptions?.excludePatterns,
|
|
7988
|
-
headers: scraperOptions?.headers
|
|
7989
|
-
// <-- propagate headers
|
|
7990
|
-
});
|
|
7991
|
-
if (waitForCompletion) {
|
|
7992
|
-
try {
|
|
7993
|
-
await pipeline.waitForJobCompletion(jobId);
|
|
7994
|
-
const finalJob = await pipeline.getJob(jobId);
|
|
7995
|
-
const finalPagesScraped = finalJob?.progress?.pagesScraped ?? 0;
|
|
7996
|
-
logger.debug(
|
|
7997
|
-
`Job ${jobId} finished with status ${finalJob?.status}. Pages scraped: ${finalPagesScraped}`
|
|
7998
|
-
);
|
|
7999
|
-
return {
|
|
8000
|
-
pagesScraped: finalPagesScraped
|
|
8001
|
-
};
|
|
8002
|
-
} catch (error) {
|
|
8003
|
-
logger.error(`❌ Job ${jobId} failed or was cancelled: ${error}`);
|
|
8004
|
-
throw error;
|
|
8005
|
-
}
|
|
7929
|
+
throw new Error(
|
|
7930
|
+
`Invalid version format for scraping: '${version2}'. Use 'X.Y.Z', 'X.Y.Z-prerelease', 'X.Y', 'X', or omit.`
|
|
7931
|
+
);
|
|
8006
7932
|
}
|
|
8007
|
-
|
|
8008
|
-
|
|
8009
|
-
|
|
8010
|
-
|
|
8011
|
-
|
|
8012
|
-
|
|
8013
|
-
|
|
8014
|
-
|
|
8015
|
-
|
|
8016
|
-
|
|
8017
|
-
|
|
8018
|
-
|
|
7933
|
+
} else {
|
|
7934
|
+
throw new Error(
|
|
7935
|
+
`Invalid version format for scraping: '${version2}'. Use 'X.Y.Z', 'X.Y.Z-prerelease', 'X.Y', 'X', or omit.`
|
|
7936
|
+
);
|
|
7937
|
+
}
|
|
7938
|
+
}
|
|
7939
|
+
internalVersion = internalVersion.toLowerCase();
|
|
7940
|
+
const pipeline = this.pipeline;
|
|
7941
|
+
const enqueueVersion = internalVersion === "" ? null : internalVersion;
|
|
7942
|
+
const jobId = await pipeline.enqueueJob(library, enqueueVersion, {
|
|
7943
|
+
url,
|
|
7944
|
+
library,
|
|
7945
|
+
version: internalVersion,
|
|
7946
|
+
scope: scraperOptions?.scope ?? "subpages",
|
|
7947
|
+
followRedirects: scraperOptions?.followRedirects ?? true,
|
|
7948
|
+
maxPages: scraperOptions?.maxPages ?? DEFAULT_MAX_PAGES,
|
|
7949
|
+
maxDepth: scraperOptions?.maxDepth ?? DEFAULT_MAX_DEPTH$1,
|
|
7950
|
+
maxConcurrency: scraperOptions?.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY,
|
|
7951
|
+
ignoreErrors: scraperOptions?.ignoreErrors ?? true,
|
|
7952
|
+
scrapeMode: scraperOptions?.scrapeMode ?? ScrapeMode.Auto,
|
|
7953
|
+
// Pass scrapeMode enum
|
|
7954
|
+
includePatterns: scraperOptions?.includePatterns,
|
|
7955
|
+
excludePatterns: scraperOptions?.excludePatterns,
|
|
7956
|
+
headers: scraperOptions?.headers
|
|
7957
|
+
// <-- propagate headers
|
|
7958
|
+
});
|
|
7959
|
+
if (waitForCompletion) {
|
|
7960
|
+
try {
|
|
7961
|
+
await pipeline.waitForJobCompletion(jobId);
|
|
7962
|
+
const finalJob = await pipeline.getJob(jobId);
|
|
7963
|
+
const finalPagesScraped = finalJob?.progress?.pagesScraped ?? 0;
|
|
7964
|
+
logger.debug(
|
|
7965
|
+
`Job ${jobId} finished with status ${finalJob?.status}. Pages scraped: ${finalPagesScraped}`
|
|
7966
|
+
);
|
|
7967
|
+
return {
|
|
7968
|
+
pagesScraped: finalPagesScraped
|
|
7969
|
+
};
|
|
7970
|
+
} catch (error) {
|
|
7971
|
+
logger.error(`❌ Job ${jobId} failed or was cancelled: ${error}`);
|
|
7972
|
+
throw error;
|
|
7973
|
+
}
|
|
7974
|
+
}
|
|
7975
|
+
return { jobId };
|
|
8019
7976
|
}
|
|
8020
7977
|
}
|
|
8021
7978
|
class SearchTool {
|
|
@@ -8025,56 +7982,43 @@ class SearchTool {
|
|
|
8025
7982
|
}
|
|
8026
7983
|
async execute(options) {
|
|
8027
7984
|
const { library, version: version2, query, limit = 5, exactMatch = false } = options;
|
|
8028
|
-
|
|
8029
|
-
|
|
8030
|
-
|
|
8031
|
-
|
|
8032
|
-
|
|
8033
|
-
|
|
8034
|
-
|
|
8035
|
-
|
|
8036
|
-
|
|
8037
|
-
|
|
8038
|
-
|
|
8039
|
-
|
|
8040
|
-
|
|
8041
|
-
|
|
8042
|
-
|
|
8043
|
-
|
|
8044
|
-
|
|
8045
|
-
|
|
8046
|
-
|
|
8047
|
-
|
|
8048
|
-
|
|
8049
|
-
|
|
8050
|
-
|
|
8051
|
-
|
|
8052
|
-
versionToSearch = versionResult.bestMatch;
|
|
8053
|
-
}
|
|
8054
|
-
const results = await this.docService.searchStore(
|
|
8055
|
-
library,
|
|
8056
|
-
versionToSearch,
|
|
8057
|
-
query,
|
|
8058
|
-
limit
|
|
8059
|
-
);
|
|
8060
|
-
logger.info(`✅ Found ${results.length} matching results`);
|
|
8061
|
-
return { results };
|
|
8062
|
-
} catch (error) {
|
|
8063
|
-
logger.error(
|
|
8064
|
-
`❌ Search failed: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
8065
|
-
);
|
|
8066
|
-
throw error;
|
|
8067
|
-
}
|
|
8068
|
-
},
|
|
8069
|
-
(result) => ({
|
|
7985
|
+
if (exactMatch && (!version2 || version2 === "latest")) {
|
|
7986
|
+
await this.docService.validateLibraryExists(library);
|
|
7987
|
+
const allLibraries = await this.docService.listLibraries();
|
|
7988
|
+
const libraryInfo = allLibraries.find((lib) => lib.library === library);
|
|
7989
|
+
const detailedVersions = libraryInfo ? libraryInfo.versions.map((v) => ({
|
|
7990
|
+
version: v.ref.version,
|
|
7991
|
+
documentCount: v.counts.documents,
|
|
7992
|
+
uniqueUrlCount: v.counts.uniqueUrls,
|
|
7993
|
+
indexedAt: v.indexedAt
|
|
7994
|
+
})) : [];
|
|
7995
|
+
throw new VersionNotFoundError(library, version2 ?? "latest", detailedVersions);
|
|
7996
|
+
}
|
|
7997
|
+
const resolvedVersion = version2 || "latest";
|
|
7998
|
+
logger.info(
|
|
7999
|
+
`🔍 Searching ${library}@${resolvedVersion} for: ${query}${exactMatch ? " (exact match)" : ""}`
|
|
8000
|
+
);
|
|
8001
|
+
try {
|
|
8002
|
+
await this.docService.validateLibraryExists(library);
|
|
8003
|
+
let versionToSearch = resolvedVersion;
|
|
8004
|
+
if (!exactMatch) {
|
|
8005
|
+
const versionResult = await this.docService.findBestVersion(library, version2);
|
|
8006
|
+
versionToSearch = versionResult.bestMatch;
|
|
8007
|
+
}
|
|
8008
|
+
const results = await this.docService.searchStore(
|
|
8070
8009
|
library,
|
|
8071
|
-
|
|
8010
|
+
versionToSearch,
|
|
8072
8011
|
query,
|
|
8073
|
-
limit
|
|
8074
|
-
|
|
8075
|
-
|
|
8076
|
-
}
|
|
8077
|
-
)
|
|
8012
|
+
limit
|
|
8013
|
+
);
|
|
8014
|
+
logger.info(`✅ Found ${results.length} matching results`);
|
|
8015
|
+
return { results };
|
|
8016
|
+
} catch (error) {
|
|
8017
|
+
logger.error(
|
|
8018
|
+
`❌ Search failed: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
8019
|
+
);
|
|
8020
|
+
throw error;
|
|
8021
|
+
}
|
|
8078
8022
|
}
|
|
8079
8023
|
}
|
|
8080
8024
|
function createResponse(text) {
|
|
@@ -8133,6 +8077,17 @@ function createMcpServerInstance(tools, readOnly = false) {
|
|
|
8133
8077
|
// requires internet access
|
|
8134
8078
|
},
|
|
8135
8079
|
async ({ url, library, version: version2, maxPages, maxDepth, scope, followRedirects }) => {
|
|
8080
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8081
|
+
tool: "scrape_docs",
|
|
8082
|
+
context: "mcp_server",
|
|
8083
|
+
library,
|
|
8084
|
+
version: version2,
|
|
8085
|
+
url: new URL(url).hostname,
|
|
8086
|
+
// Privacy-safe URL tracking
|
|
8087
|
+
maxPages,
|
|
8088
|
+
maxDepth,
|
|
8089
|
+
scope
|
|
8090
|
+
});
|
|
8136
8091
|
try {
|
|
8137
8092
|
const result = await tools.scrape.execute({
|
|
8138
8093
|
url,
|
|
@@ -8177,6 +8132,15 @@ function createMcpServerInstance(tools, readOnly = false) {
|
|
|
8177
8132
|
destructiveHint: false
|
|
8178
8133
|
},
|
|
8179
8134
|
async ({ library, version: version2, query, limit }) => {
|
|
8135
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8136
|
+
tool: "search_docs",
|
|
8137
|
+
context: "mcp_server",
|
|
8138
|
+
library,
|
|
8139
|
+
version: version2,
|
|
8140
|
+
query: query.substring(0, 100),
|
|
8141
|
+
// Truncate query for privacy
|
|
8142
|
+
limit
|
|
8143
|
+
});
|
|
8180
8144
|
try {
|
|
8181
8145
|
const result = await tools.search.execute({
|
|
8182
8146
|
library,
|
|
@@ -8236,6 +8200,10 @@ ${r.content}
|
|
|
8236
8200
|
destructiveHint: false
|
|
8237
8201
|
},
|
|
8238
8202
|
async () => {
|
|
8203
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8204
|
+
tool: "list_libraries",
|
|
8205
|
+
context: "mcp_server"
|
|
8206
|
+
});
|
|
8239
8207
|
try {
|
|
8240
8208
|
const result = await tools.listLibraries.execute();
|
|
8241
8209
|
if (result.libraries.length === 0) {
|
|
@@ -8266,6 +8234,12 @@ ${result.libraries.map((lib) => `- ${lib.name}`).join("\n")}`
|
|
|
8266
8234
|
destructiveHint: false
|
|
8267
8235
|
},
|
|
8268
8236
|
async ({ library, targetVersion }) => {
|
|
8237
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8238
|
+
tool: "find_version",
|
|
8239
|
+
context: "mcp_server",
|
|
8240
|
+
library,
|
|
8241
|
+
targetVersion
|
|
8242
|
+
});
|
|
8269
8243
|
try {
|
|
8270
8244
|
const message = await tools.findVersion.execute({
|
|
8271
8245
|
library,
|
|
@@ -8295,6 +8269,11 @@ ${result.libraries.map((lib) => `- ${lib.name}`).join("\n")}`
|
|
|
8295
8269
|
destructiveHint: false
|
|
8296
8270
|
},
|
|
8297
8271
|
async ({ status }) => {
|
|
8272
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8273
|
+
tool: "list_jobs",
|
|
8274
|
+
context: "mcp_server",
|
|
8275
|
+
status
|
|
8276
|
+
});
|
|
8298
8277
|
try {
|
|
8299
8278
|
const result = await tools.listJobs.execute({
|
|
8300
8279
|
status
|
|
@@ -8333,6 +8312,11 @@ ${formattedJobs}` : "No jobs found."
|
|
|
8333
8312
|
destructiveHint: false
|
|
8334
8313
|
},
|
|
8335
8314
|
async ({ jobId }) => {
|
|
8315
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8316
|
+
tool: "get_job_info",
|
|
8317
|
+
context: "mcp_server",
|
|
8318
|
+
jobId
|
|
8319
|
+
});
|
|
8336
8320
|
try {
|
|
8337
8321
|
const result = await tools.getJobInfo.execute({ jobId });
|
|
8338
8322
|
if (!result.job) {
|
|
@@ -8367,6 +8351,11 @@ ${formattedJob}`);
|
|
|
8367
8351
|
destructiveHint: true
|
|
8368
8352
|
},
|
|
8369
8353
|
async ({ jobId }) => {
|
|
8354
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8355
|
+
tool: "cancel_job",
|
|
8356
|
+
context: "mcp_server",
|
|
8357
|
+
jobId
|
|
8358
|
+
});
|
|
8370
8359
|
try {
|
|
8371
8360
|
const result = await tools.cancelJob.execute({ jobId });
|
|
8372
8361
|
if (result.success) {
|
|
@@ -8392,6 +8381,12 @@ ${formattedJob}`);
|
|
|
8392
8381
|
destructiveHint: true
|
|
8393
8382
|
},
|
|
8394
8383
|
async ({ library, version: version2 }) => {
|
|
8384
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8385
|
+
tool: "remove_docs",
|
|
8386
|
+
context: "mcp_server",
|
|
8387
|
+
library,
|
|
8388
|
+
version: version2
|
|
8389
|
+
});
|
|
8395
8390
|
try {
|
|
8396
8391
|
const result = await tools.remove.execute({ library, version: version2 });
|
|
8397
8392
|
return createResponse(result.message);
|
|
@@ -8418,6 +8413,13 @@ ${formattedJob}`);
|
|
|
8418
8413
|
// requires internet access
|
|
8419
8414
|
},
|
|
8420
8415
|
async ({ url, followRedirects }) => {
|
|
8416
|
+
analytics.track(TelemetryEvent.TOOL_USED, {
|
|
8417
|
+
tool: "fetch_url",
|
|
8418
|
+
context: "mcp_server",
|
|
8419
|
+
url: new URL(url).hostname,
|
|
8420
|
+
// Privacy-safe URL tracking
|
|
8421
|
+
followRedirects
|
|
8422
|
+
});
|
|
8421
8423
|
try {
|
|
8422
8424
|
const result = await tools.fetchUrl.execute({ url, followRedirects });
|
|
8423
8425
|
return createResponse(result);
|
|
@@ -8677,6 +8679,18 @@ function createPipelineRouter(trpc) {
|
|
|
8677
8679
|
input.version ?? null,
|
|
8678
8680
|
input.options
|
|
8679
8681
|
);
|
|
8682
|
+
analytics.track(TelemetryEvent.WEB_SCRAPE_STARTED, {
|
|
8683
|
+
library: input.library,
|
|
8684
|
+
version: input.version || void 0,
|
|
8685
|
+
url: input.options.url,
|
|
8686
|
+
scope: input.options.scope || "subpages",
|
|
8687
|
+
maxDepth: input.options.maxDepth || 3,
|
|
8688
|
+
maxPages: input.options.maxPages || 1e3,
|
|
8689
|
+
maxConcurrency: input.options.maxConcurrency,
|
|
8690
|
+
ignoreErrors: input.options.ignoreErrors,
|
|
8691
|
+
scrapeMode: input.options.scrapeMode,
|
|
8692
|
+
hasCustomHeaders: !!(input.options.headers && Object.keys(input.options.headers).length > 0)
|
|
8693
|
+
});
|
|
8680
8694
|
return { jobId };
|
|
8681
8695
|
}
|
|
8682
8696
|
),
|
|
@@ -8760,6 +8774,13 @@ function createDataRouter(trpc) {
|
|
|
8760
8774
|
input.query,
|
|
8761
8775
|
input.limit ?? 5
|
|
8762
8776
|
);
|
|
8777
|
+
analytics.track(TelemetryEvent.WEB_SEARCH_PERFORMED, {
|
|
8778
|
+
library: input.library,
|
|
8779
|
+
version: input.version || void 0,
|
|
8780
|
+
queryLength: input.query.length,
|
|
8781
|
+
resultCount: results.length,
|
|
8782
|
+
limit: input.limit ?? 5
|
|
8783
|
+
});
|
|
8763
8784
|
return results;
|
|
8764
8785
|
}
|
|
8765
8786
|
),
|
|
@@ -10300,22 +10321,6 @@ async function registerWorkerService(pipeline) {
|
|
|
10300
10321
|
logger.debug(
|
|
10301
10322
|
`Job ${job.id} progress: ${progress.pagesScraped}/${progress.totalPages} pages`
|
|
10302
10323
|
);
|
|
10303
|
-
analytics.track(TelemetryEvent.PIPELINE_JOB_PROGRESS, {
|
|
10304
|
-
jobId: job.id,
|
|
10305
|
-
// Job IDs are already anonymous
|
|
10306
|
-
library: job.library,
|
|
10307
|
-
pagesScraped: progress.pagesScraped,
|
|
10308
|
-
totalPages: progress.totalPages,
|
|
10309
|
-
totalDiscovered: progress.totalDiscovered,
|
|
10310
|
-
progressPercent: Math.round(progress.pagesScraped / progress.totalPages * 100),
|
|
10311
|
-
currentDepth: progress.depth,
|
|
10312
|
-
maxDepth: progress.maxDepth,
|
|
10313
|
-
discoveryRatio: Math.round(
|
|
10314
|
-
progress.totalDiscovered / progress.totalPages * 100
|
|
10315
|
-
),
|
|
10316
|
-
// How much we discovered vs limited total
|
|
10317
|
-
queueEfficiency: progress.totalPages > 0 ? Math.round(progress.pagesScraped / progress.totalPages * 100) : 0
|
|
10318
|
-
});
|
|
10319
10324
|
},
|
|
10320
10325
|
onJobStatusChange: async (job) => {
|
|
10321
10326
|
logger.debug(`Job ${job.id} status changed to: ${job.status}`);
|
|
@@ -11731,9 +11736,9 @@ class DocumentStore {
|
|
|
11731
11736
|
return [...vector, ...new Array(this.dbDimension - vector.length).fill(0)];
|
|
11732
11737
|
}
|
|
11733
11738
|
/**
|
|
11734
|
-
* Initialize the embeddings client using
|
|
11735
|
-
* If no embedding config is provided (null), embeddings will not be initialized.
|
|
11736
|
-
* This allows DocumentStore to be used without embeddings for operations
|
|
11739
|
+
* Initialize the embeddings client using the provided config.
|
|
11740
|
+
* If no embedding config is provided (null or undefined), embeddings will not be initialized.
|
|
11741
|
+
* This allows DocumentStore to be used without embeddings for FTS-only operations.
|
|
11737
11742
|
*
|
|
11738
11743
|
* Environment variables per provider:
|
|
11739
11744
|
* - openai: OPENAI_API_KEY (and optionally OPENAI_API_BASE, OPENAI_ORG_ID)
|
|
@@ -11743,11 +11748,13 @@ class DocumentStore {
|
|
|
11743
11748
|
* - microsoft: Azure OpenAI credentials (AZURE_OPENAI_API_*)
|
|
11744
11749
|
*/
|
|
11745
11750
|
async initializeEmbeddings() {
|
|
11746
|
-
if (this.embeddingConfig === null) {
|
|
11747
|
-
logger.debug(
|
|
11751
|
+
if (this.embeddingConfig === null || this.embeddingConfig === void 0) {
|
|
11752
|
+
logger.debug(
|
|
11753
|
+
"Embedding initialization skipped (no config provided - FTS-only mode)"
|
|
11754
|
+
);
|
|
11748
11755
|
return;
|
|
11749
11756
|
}
|
|
11750
|
-
const config = this.embeddingConfig
|
|
11757
|
+
const config = this.embeddingConfig;
|
|
11751
11758
|
if (!areCredentialsAvailable(config.provider)) {
|
|
11752
11759
|
logger.warn(
|
|
11753
11760
|
`⚠️ No credentials found for ${config.provider} embedding provider. Vector search is disabled.
|
|
@@ -12057,7 +12064,7 @@ class DocumentStore {
|
|
|
12057
12064
|
`;
|
|
12058
12065
|
return `${header}${doc.pageContent}`;
|
|
12059
12066
|
});
|
|
12060
|
-
const maxBatchChars =
|
|
12067
|
+
const maxBatchChars = EMBEDDING_BATCH_CHARS;
|
|
12061
12068
|
const rawEmbeddings = [];
|
|
12062
12069
|
let currentBatch = [];
|
|
12063
12070
|
let currentBatchSize = 0;
|
|
@@ -12533,14 +12540,13 @@ class DocumentManagementService {
|
|
|
12533
12540
|
normalizeVersion(version2) {
|
|
12534
12541
|
return (version2 ?? "").toLowerCase();
|
|
12535
12542
|
}
|
|
12536
|
-
constructor(embeddingConfig, pipelineConfig) {
|
|
12543
|
+
constructor(embeddingConfig, pipelineConfig, storePath) {
|
|
12537
12544
|
let dbPath;
|
|
12538
12545
|
let dbDir;
|
|
12539
|
-
|
|
12540
|
-
|
|
12541
|
-
dbDir = envStorePath;
|
|
12546
|
+
if (storePath) {
|
|
12547
|
+
dbDir = storePath;
|
|
12542
12548
|
dbPath = path.join(dbDir, "documents.db");
|
|
12543
|
-
logger.debug(`Using database directory from
|
|
12549
|
+
logger.debug(`Using database directory from storePath parameter: ${dbDir}`);
|
|
12544
12550
|
} else {
|
|
12545
12551
|
const projectRoot2 = getProjectRoot();
|
|
12546
12552
|
const oldDbDir = path.join(projectRoot2, ".store");
|
|
@@ -12916,41 +12922,72 @@ async function createDocumentManagement(options = {}) {
|
|
|
12916
12922
|
await client.initialize();
|
|
12917
12923
|
return client;
|
|
12918
12924
|
}
|
|
12919
|
-
const service = new DocumentManagementService(
|
|
12925
|
+
const service = new DocumentManagementService(
|
|
12926
|
+
options.embeddingConfig,
|
|
12927
|
+
void 0,
|
|
12928
|
+
options.storePath
|
|
12929
|
+
);
|
|
12920
12930
|
await service.initialize();
|
|
12921
12931
|
return service;
|
|
12922
12932
|
}
|
|
12923
|
-
async function createLocalDocumentManagement(embeddingConfig) {
|
|
12924
|
-
const service = new DocumentManagementService(embeddingConfig);
|
|
12933
|
+
async function createLocalDocumentManagement(embeddingConfig, storePath) {
|
|
12934
|
+
const service = new DocumentManagementService(embeddingConfig, void 0, storePath);
|
|
12925
12935
|
await service.initialize();
|
|
12926
12936
|
return service;
|
|
12927
12937
|
}
|
|
12928
12938
|
function createDefaultAction(program) {
|
|
12929
12939
|
return program.addOption(
|
|
12930
|
-
new Option("--protocol <protocol>", "Protocol for MCP server").choices(["auto", "stdio", "http"])
|
|
12940
|
+
new Option("--protocol <protocol>", "Protocol for MCP server").env("DOCS_MCP_PROTOCOL").default("auto").choices(["auto", "stdio", "http"])
|
|
12931
12941
|
).addOption(
|
|
12932
|
-
new Option("--port <number>", "Port for the server").argParser((v) => {
|
|
12942
|
+
new Option("--port <number>", "Port for the server").env("DOCS_MCP_PORT").env("PORT").default(CLI_DEFAULTS.HTTP_PORT.toString()).argParser((v) => {
|
|
12933
12943
|
const n = Number(v);
|
|
12934
12944
|
if (!Number.isInteger(n) || n < 1 || n > 65535) {
|
|
12935
12945
|
throw new Error("Port must be an integer between 1 and 65535");
|
|
12936
12946
|
}
|
|
12937
12947
|
return String(n);
|
|
12938
|
-
})
|
|
12948
|
+
})
|
|
12949
|
+
).addOption(
|
|
12950
|
+
new Option("--host <host>", "Host to bind the server to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
|
|
12939
12951
|
).addOption(
|
|
12940
|
-
new Option(
|
|
12952
|
+
new Option(
|
|
12953
|
+
"--embedding-model <model>",
|
|
12954
|
+
"Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
|
|
12955
|
+
).env("DOCS_MCP_EMBEDDING_MODEL")
|
|
12941
12956
|
).option("--resume", "Resume interrupted jobs on startup", false).option("--no-resume", "Do not resume jobs on startup").option(
|
|
12942
12957
|
"--read-only",
|
|
12943
12958
|
"Run in read-only mode (only expose read tools, disable write/job tools)",
|
|
12944
12959
|
false
|
|
12945
|
-
).
|
|
12946
|
-
|
|
12947
|
-
|
|
12948
|
-
|
|
12949
|
-
|
|
12950
|
-
|
|
12951
|
-
|
|
12960
|
+
).addOption(
|
|
12961
|
+
new Option(
|
|
12962
|
+
"--auth-enabled",
|
|
12963
|
+
"Enable OAuth2/OIDC authentication for MCP endpoints"
|
|
12964
|
+
).env("DOCS_MCP_AUTH_ENABLED").argParser((value) => {
|
|
12965
|
+
if (value === void 0) {
|
|
12966
|
+
return process.env.DOCS_MCP_AUTH_ENABLED === "true" || process.env.DOCS_MCP_AUTH_ENABLED === "1";
|
|
12967
|
+
}
|
|
12968
|
+
return value;
|
|
12969
|
+
}).default(false)
|
|
12970
|
+
).addOption(
|
|
12971
|
+
new Option(
|
|
12972
|
+
"--auth-issuer-url <url>",
|
|
12973
|
+
"Issuer/discovery URL for OAuth2/OIDC provider"
|
|
12974
|
+
).env("DOCS_MCP_AUTH_ISSUER_URL")
|
|
12975
|
+
).addOption(
|
|
12976
|
+
new Option(
|
|
12977
|
+
"--auth-audience <id>",
|
|
12978
|
+
"JWT audience claim (identifies this protected resource)"
|
|
12979
|
+
).env("DOCS_MCP_AUTH_AUDIENCE")
|
|
12952
12980
|
).action(
|
|
12953
12981
|
async (options) => {
|
|
12982
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
12983
|
+
command: "default",
|
|
12984
|
+
protocol: options.protocol,
|
|
12985
|
+
port: options.port,
|
|
12986
|
+
host: options.host,
|
|
12987
|
+
resume: options.resume,
|
|
12988
|
+
readOnly: options.readOnly,
|
|
12989
|
+
authEnabled: !!options.authEnabled
|
|
12990
|
+
});
|
|
12954
12991
|
const resolvedProtocol = resolveProtocol(options.protocol);
|
|
12955
12992
|
if (resolvedProtocol === "stdio") {
|
|
12956
12993
|
setLogLevel(LogLevel.ERROR);
|
|
@@ -12967,9 +13004,13 @@ function createDefaultAction(program) {
|
|
|
12967
13004
|
validateAuthConfig(authConfig);
|
|
12968
13005
|
warnHttpUsage(authConfig, port);
|
|
12969
13006
|
}
|
|
13007
|
+
const globalOptions = program.parent?.opts() || {};
|
|
12970
13008
|
ensurePlaywrightBrowsersInstalled();
|
|
12971
|
-
const embeddingConfig = resolveEmbeddingContext();
|
|
12972
|
-
const docService = await createLocalDocumentManagement(
|
|
13009
|
+
const embeddingConfig = resolveEmbeddingContext(options.embeddingModel);
|
|
13010
|
+
const docService = await createLocalDocumentManagement(
|
|
13011
|
+
embeddingConfig,
|
|
13012
|
+
globalOptions.storePath
|
|
13013
|
+
);
|
|
12973
13014
|
const pipelineOptions = {
|
|
12974
13015
|
recoverJobs: options.resume || false,
|
|
12975
13016
|
// Use --resume flag for job recovery
|
|
@@ -13021,6 +13062,13 @@ function createDefaultAction(program) {
|
|
|
13021
13062
|
);
|
|
13022
13063
|
}
|
|
13023
13064
|
async function fetchUrlAction(url, options) {
|
|
13065
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13066
|
+
command: "fetch-url",
|
|
13067
|
+
url,
|
|
13068
|
+
scrapeMode: options.scrapeMode,
|
|
13069
|
+
followRedirects: options.followRedirects,
|
|
13070
|
+
hasHeaders: options.header.length > 0
|
|
13071
|
+
});
|
|
13024
13072
|
const headers = parseHeaders(options.header);
|
|
13025
13073
|
const fetchUrlTool = new FetchUrlTool(new HttpFetcher(), new FileFetcher());
|
|
13026
13074
|
const content = await fetchUrlTool.execute({
|
|
@@ -13057,6 +13105,12 @@ function createFetchUrlCommand(program) {
|
|
|
13057
13105
|
).action(fetchUrlAction);
|
|
13058
13106
|
}
|
|
13059
13107
|
async function findVersionAction(library, options) {
|
|
13108
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13109
|
+
command: "find-version",
|
|
13110
|
+
library,
|
|
13111
|
+
version: options.version,
|
|
13112
|
+
useServerUrl: !!options.serverUrl
|
|
13113
|
+
});
|
|
13060
13114
|
const serverUrl = options.serverUrl;
|
|
13061
13115
|
const docService = await createDocumentManagement({
|
|
13062
13116
|
serverUrl,
|
|
@@ -13081,6 +13135,10 @@ function createFindVersionCommand(program) {
|
|
|
13081
13135
|
).action(findVersionAction);
|
|
13082
13136
|
}
|
|
13083
13137
|
async function listAction(options) {
|
|
13138
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13139
|
+
command: "list",
|
|
13140
|
+
useServerUrl: !!options.serverUrl
|
|
13141
|
+
});
|
|
13084
13142
|
const { serverUrl } = options;
|
|
13085
13143
|
const docService = await createDocumentManagement({
|
|
13086
13144
|
serverUrl,
|
|
@@ -13102,17 +13160,22 @@ function createListCommand(program) {
|
|
|
13102
13160
|
}
|
|
13103
13161
|
function createMcpCommand(program) {
|
|
13104
13162
|
return program.command("mcp").description("Start MCP server only").addOption(
|
|
13105
|
-
new Option("--protocol <protocol>", "Protocol for MCP server").choices(["auto", "stdio", "http"])
|
|
13163
|
+
new Option("--protocol <protocol>", "Protocol for MCP server").env("DOCS_MCP_PROTOCOL").default(CLI_DEFAULTS.PROTOCOL).choices(["auto", "stdio", "http"])
|
|
13106
13164
|
).addOption(
|
|
13107
|
-
new Option("--port <number>", "Port for the MCP server").argParser((v) => {
|
|
13165
|
+
new Option("--port <number>", "Port for the MCP server").env("DOCS_MCP_PORT").env("PORT").default(CLI_DEFAULTS.HTTP_PORT.toString()).argParser((v) => {
|
|
13108
13166
|
const n = Number(v);
|
|
13109
13167
|
if (!Number.isInteger(n) || n < 1 || n > 65535) {
|
|
13110
13168
|
throw new Error("Port must be an integer between 1 and 65535");
|
|
13111
13169
|
}
|
|
13112
13170
|
return String(n);
|
|
13113
|
-
})
|
|
13171
|
+
})
|
|
13114
13172
|
).addOption(
|
|
13115
|
-
new Option("--host <host>", "Host to bind the MCP server to").
|
|
13173
|
+
new Option("--host <host>", "Host to bind the MCP server to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
|
|
13174
|
+
).addOption(
|
|
13175
|
+
new Option(
|
|
13176
|
+
"--embedding-model <model>",
|
|
13177
|
+
"Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
|
|
13178
|
+
).env("DOCS_MCP_EMBEDDING_MODEL")
|
|
13116
13179
|
).option(
|
|
13117
13180
|
"--server-url <url>",
|
|
13118
13181
|
"URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
|
|
@@ -13120,15 +13183,37 @@ function createMcpCommand(program) {
|
|
|
13120
13183
|
"--read-only",
|
|
13121
13184
|
"Run in read-only mode (only expose read tools, disable write/job tools)",
|
|
13122
13185
|
false
|
|
13123
|
-
).
|
|
13124
|
-
|
|
13125
|
-
|
|
13126
|
-
|
|
13127
|
-
|
|
13128
|
-
|
|
13129
|
-
|
|
13186
|
+
).addOption(
|
|
13187
|
+
new Option(
|
|
13188
|
+
"--auth-enabled",
|
|
13189
|
+
"Enable OAuth2/OIDC authentication for MCP endpoints"
|
|
13190
|
+
).env("DOCS_MCP_AUTH_ENABLED").argParser((value) => {
|
|
13191
|
+
if (value === void 0) {
|
|
13192
|
+
return process.env.DOCS_MCP_AUTH_ENABLED === "true" || process.env.DOCS_MCP_AUTH_ENABLED === "1";
|
|
13193
|
+
}
|
|
13194
|
+
return value;
|
|
13195
|
+
}).default(false)
|
|
13196
|
+
).addOption(
|
|
13197
|
+
new Option(
|
|
13198
|
+
"--auth-issuer-url <url>",
|
|
13199
|
+
"Issuer/discovery URL for OAuth2/OIDC provider"
|
|
13200
|
+
).env("DOCS_MCP_AUTH_ISSUER_URL")
|
|
13201
|
+
).addOption(
|
|
13202
|
+
new Option(
|
|
13203
|
+
"--auth-audience <id>",
|
|
13204
|
+
"JWT audience claim (identifies this protected resource)"
|
|
13205
|
+
).env("DOCS_MCP_AUTH_AUDIENCE")
|
|
13130
13206
|
).action(
|
|
13131
13207
|
async (cmdOptions) => {
|
|
13208
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13209
|
+
command: "mcp",
|
|
13210
|
+
protocol: cmdOptions.protocol,
|
|
13211
|
+
port: cmdOptions.port,
|
|
13212
|
+
host: cmdOptions.host,
|
|
13213
|
+
useServerUrl: !!cmdOptions.serverUrl,
|
|
13214
|
+
readOnly: cmdOptions.readOnly,
|
|
13215
|
+
authEnabled: !!cmdOptions.authEnabled
|
|
13216
|
+
});
|
|
13132
13217
|
const port = validatePort(cmdOptions.port);
|
|
13133
13218
|
const host = validateHost(cmdOptions.host);
|
|
13134
13219
|
const serverUrl = cmdOptions.serverUrl;
|
|
@@ -13144,8 +13229,9 @@ function createMcpCommand(program) {
|
|
|
13144
13229
|
if (authConfig) {
|
|
13145
13230
|
validateAuthConfig(authConfig);
|
|
13146
13231
|
}
|
|
13232
|
+
const globalOptions = program.parent?.opts() || {};
|
|
13147
13233
|
try {
|
|
13148
|
-
const embeddingConfig = resolveEmbeddingContext();
|
|
13234
|
+
const embeddingConfig = resolveEmbeddingContext(cmdOptions.embeddingModel);
|
|
13149
13235
|
if (!serverUrl && !embeddingConfig) {
|
|
13150
13236
|
logger.error(
|
|
13151
13237
|
"❌ Embedding configuration is required for local mode. Configure an embedding provider with CLI options or environment variables."
|
|
@@ -13154,7 +13240,8 @@ function createMcpCommand(program) {
|
|
|
13154
13240
|
}
|
|
13155
13241
|
const docService = await createDocumentManagement({
|
|
13156
13242
|
serverUrl,
|
|
13157
|
-
embeddingConfig
|
|
13243
|
+
embeddingConfig,
|
|
13244
|
+
storePath: globalOptions.storePath
|
|
13158
13245
|
});
|
|
13159
13246
|
const pipelineOptions = {
|
|
13160
13247
|
recoverJobs: false,
|
|
@@ -13216,6 +13303,12 @@ function createMcpCommand(program) {
|
|
|
13216
13303
|
);
|
|
13217
13304
|
}
|
|
13218
13305
|
async function removeAction(library, options) {
|
|
13306
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13307
|
+
command: "remove",
|
|
13308
|
+
library,
|
|
13309
|
+
version: options.version,
|
|
13310
|
+
useServerUrl: !!options.serverUrl
|
|
13311
|
+
});
|
|
13219
13312
|
const serverUrl = options.serverUrl;
|
|
13220
13313
|
const docService = await createDocumentManagement({
|
|
13221
13314
|
serverUrl,
|
|
@@ -13244,9 +13337,26 @@ function createRemoveCommand(program) {
|
|
|
13244
13337
|
"URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
|
|
13245
13338
|
).action(removeAction);
|
|
13246
13339
|
}
|
|
13247
|
-
async function scrapeAction(library, url, options) {
|
|
13340
|
+
async function scrapeAction(library, url, options, command) {
|
|
13341
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13342
|
+
command: "scrape",
|
|
13343
|
+
library,
|
|
13344
|
+
version: options.version,
|
|
13345
|
+
url,
|
|
13346
|
+
maxPages: Number.parseInt(options.maxPages, 10),
|
|
13347
|
+
maxDepth: Number.parseInt(options.maxDepth, 10),
|
|
13348
|
+
maxConcurrency: Number.parseInt(options.maxConcurrency, 10),
|
|
13349
|
+
scope: options.scope,
|
|
13350
|
+
scrapeMode: options.scrapeMode,
|
|
13351
|
+
followRedirects: options.followRedirects,
|
|
13352
|
+
hasHeaders: options.header.length > 0,
|
|
13353
|
+
hasIncludePatterns: options.includePattern.length > 0,
|
|
13354
|
+
hasExcludePatterns: options.excludePattern.length > 0,
|
|
13355
|
+
useServerUrl: !!options.serverUrl
|
|
13356
|
+
});
|
|
13248
13357
|
const serverUrl = options.serverUrl;
|
|
13249
|
-
const
|
|
13358
|
+
const globalOptions = command?.parent?.opts() || {};
|
|
13359
|
+
const embeddingConfig = resolveEmbeddingContext(options.embeddingModel);
|
|
13250
13360
|
if (!serverUrl && !embeddingConfig) {
|
|
13251
13361
|
throw new Error(
|
|
13252
13362
|
"Embedding configuration is required for local scraping. Please set DOCS_MCP_EMBEDDING_MODEL environment variable or use --server-url for remote execution."
|
|
@@ -13254,7 +13364,8 @@ async function scrapeAction(library, url, options) {
|
|
|
13254
13364
|
}
|
|
13255
13365
|
const docService = await createDocumentManagement({
|
|
13256
13366
|
serverUrl,
|
|
13257
|
-
embeddingConfig
|
|
13367
|
+
embeddingConfig,
|
|
13368
|
+
storePath: globalOptions.storePath
|
|
13258
13369
|
});
|
|
13259
13370
|
let pipeline = null;
|
|
13260
13371
|
try {
|
|
@@ -13356,14 +13467,28 @@ function createScrapeCommand(program) {
|
|
|
13356
13467
|
"Custom HTTP header to send with each request (can be specified multiple times)",
|
|
13357
13468
|
(val, prev = []) => prev.concat([val]),
|
|
13358
13469
|
[]
|
|
13470
|
+
).addOption(
|
|
13471
|
+
new Option(
|
|
13472
|
+
"--embedding-model <model>",
|
|
13473
|
+
"Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
|
|
13474
|
+
).env("DOCS_MCP_EMBEDDING_MODEL")
|
|
13359
13475
|
).option(
|
|
13360
13476
|
"--server-url <url>",
|
|
13361
13477
|
"URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
|
|
13362
13478
|
).action(scrapeAction);
|
|
13363
13479
|
}
|
|
13364
13480
|
async function searchAction(library, query, options) {
|
|
13481
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13482
|
+
command: "search",
|
|
13483
|
+
library,
|
|
13484
|
+
version: options.version,
|
|
13485
|
+
query,
|
|
13486
|
+
limit: Number.parseInt(options.limit, 10),
|
|
13487
|
+
exactMatch: options.exactMatch,
|
|
13488
|
+
useServerUrl: !!options.serverUrl
|
|
13489
|
+
});
|
|
13365
13490
|
const serverUrl = options.serverUrl;
|
|
13366
|
-
const embeddingConfig = resolveEmbeddingContext();
|
|
13491
|
+
const embeddingConfig = resolveEmbeddingContext(options.embeddingModel);
|
|
13367
13492
|
if (!serverUrl && !embeddingConfig) {
|
|
13368
13493
|
throw new Error(
|
|
13369
13494
|
"Embedding configuration is required for local search. Please set DOCS_MCP_EMBEDDING_MODEL environment variable or use --server-url for remote execution."
|
|
@@ -13393,139 +13518,185 @@ function createSearchCommand(program) {
|
|
|
13393
13518
|
).option(
|
|
13394
13519
|
"-v, --version <string>",
|
|
13395
13520
|
"Version of the library (optional, supports ranges)"
|
|
13396
|
-
).option("-l, --limit <number>", "Maximum number of results", "5").option("-e, --exact-match", "Only use exact version match (default: false)", false).
|
|
13521
|
+
).option("-l, --limit <number>", "Maximum number of results", "5").option("-e, --exact-match", "Only use exact version match (default: false)", false).addOption(
|
|
13522
|
+
new Option(
|
|
13523
|
+
"--embedding-model <model>",
|
|
13524
|
+
"Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
|
|
13525
|
+
).env("DOCS_MCP_EMBEDDING_MODEL")
|
|
13526
|
+
).option(
|
|
13397
13527
|
"--server-url <url>",
|
|
13398
13528
|
"URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
|
|
13399
13529
|
).action(searchAction);
|
|
13400
13530
|
}
|
|
13401
13531
|
function createWebCommand(program) {
|
|
13402
13532
|
return program.command("web").description("Start web interface only").addOption(
|
|
13403
|
-
new Option("--port <number>", "Port for the web interface").argParser((v) => {
|
|
13533
|
+
new Option("--port <number>", "Port for the web interface").env("DOCS_MCP_WEB_PORT").env("DOCS_MCP_PORT").env("PORT").default(CLI_DEFAULTS.WEB_PORT.toString()).argParser((v) => {
|
|
13404
13534
|
const n = Number(v);
|
|
13405
13535
|
if (!Number.isInteger(n) || n < 1 || n > 65535) {
|
|
13406
13536
|
throw new Error("Port must be an integer between 1 and 65535");
|
|
13407
13537
|
}
|
|
13408
13538
|
return String(n);
|
|
13409
|
-
})
|
|
13539
|
+
})
|
|
13540
|
+
).addOption(
|
|
13541
|
+
new Option("--host <host>", "Host to bind the web interface to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
|
|
13410
13542
|
).addOption(
|
|
13411
|
-
new Option(
|
|
13543
|
+
new Option(
|
|
13544
|
+
"--embedding-model <model>",
|
|
13545
|
+
"Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
|
|
13546
|
+
).env("DOCS_MCP_EMBEDDING_MODEL")
|
|
13412
13547
|
).option(
|
|
13413
13548
|
"--server-url <url>",
|
|
13414
13549
|
"URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
|
|
13415
|
-
).action(
|
|
13416
|
-
|
|
13417
|
-
|
|
13418
|
-
|
|
13419
|
-
|
|
13420
|
-
|
|
13421
|
-
|
|
13422
|
-
|
|
13423
|
-
|
|
13550
|
+
).action(
|
|
13551
|
+
async (cmdOptions) => {
|
|
13552
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13553
|
+
command: "web",
|
|
13554
|
+
port: cmdOptions.port,
|
|
13555
|
+
host: cmdOptions.host,
|
|
13556
|
+
useServerUrl: !!cmdOptions.serverUrl
|
|
13557
|
+
});
|
|
13558
|
+
const port = validatePort(cmdOptions.port);
|
|
13559
|
+
const host = validateHost(cmdOptions.host);
|
|
13560
|
+
const serverUrl = cmdOptions.serverUrl;
|
|
13561
|
+
try {
|
|
13562
|
+
const embeddingConfig = resolveEmbeddingContext(cmdOptions.embeddingModel);
|
|
13563
|
+
if (!serverUrl && !embeddingConfig) {
|
|
13564
|
+
logger.error(
|
|
13565
|
+
"❌ Embedding configuration is required for local mode. Configure an embedding provider with CLI options or environment variables."
|
|
13566
|
+
);
|
|
13567
|
+
process.exit(1);
|
|
13568
|
+
}
|
|
13569
|
+
const docService = await createDocumentManagement({
|
|
13570
|
+
serverUrl,
|
|
13571
|
+
embeddingConfig
|
|
13572
|
+
});
|
|
13573
|
+
const pipelineOptions = {
|
|
13574
|
+
recoverJobs: false,
|
|
13575
|
+
// Web command doesn't support job recovery
|
|
13576
|
+
serverUrl,
|
|
13577
|
+
concurrency: 3
|
|
13578
|
+
};
|
|
13579
|
+
const pipeline = await createPipelineWithCallbacks(
|
|
13580
|
+
serverUrl ? void 0 : docService,
|
|
13581
|
+
pipelineOptions
|
|
13424
13582
|
);
|
|
13583
|
+
const config = createAppServerConfig({
|
|
13584
|
+
enableWebInterface: true,
|
|
13585
|
+
enableMcpServer: false,
|
|
13586
|
+
enableApiServer: false,
|
|
13587
|
+
enableWorker: !serverUrl,
|
|
13588
|
+
port,
|
|
13589
|
+
host,
|
|
13590
|
+
externalWorkerUrl: serverUrl,
|
|
13591
|
+
startupContext: {
|
|
13592
|
+
cliCommand: "web"
|
|
13593
|
+
}
|
|
13594
|
+
});
|
|
13595
|
+
logger.info(
|
|
13596
|
+
`🚀 Starting web interface${serverUrl ? ` connecting to worker at ${serverUrl}` : ""}`
|
|
13597
|
+
);
|
|
13598
|
+
const appServer = await startAppServer(docService, pipeline, config);
|
|
13599
|
+
registerGlobalServices({
|
|
13600
|
+
appServer,
|
|
13601
|
+
docService
|
|
13602
|
+
// pipeline is owned by AppServer - don't register globally to avoid double shutdown
|
|
13603
|
+
});
|
|
13604
|
+
await new Promise(() => {
|
|
13605
|
+
});
|
|
13606
|
+
} catch (error) {
|
|
13607
|
+
logger.error(`❌ Failed to start web interface: ${error}`);
|
|
13425
13608
|
process.exit(1);
|
|
13426
13609
|
}
|
|
13427
|
-
const docService = await createDocumentManagement({
|
|
13428
|
-
serverUrl,
|
|
13429
|
-
embeddingConfig
|
|
13430
|
-
});
|
|
13431
|
-
const pipelineOptions = {
|
|
13432
|
-
recoverJobs: false,
|
|
13433
|
-
// Web command doesn't support job recovery
|
|
13434
|
-
serverUrl,
|
|
13435
|
-
concurrency: 3
|
|
13436
|
-
};
|
|
13437
|
-
const pipeline = await createPipelineWithCallbacks(
|
|
13438
|
-
serverUrl ? void 0 : docService,
|
|
13439
|
-
pipelineOptions
|
|
13440
|
-
);
|
|
13441
|
-
const config = createAppServerConfig({
|
|
13442
|
-
enableWebInterface: true,
|
|
13443
|
-
enableMcpServer: false,
|
|
13444
|
-
enableApiServer: false,
|
|
13445
|
-
enableWorker: !serverUrl,
|
|
13446
|
-
port,
|
|
13447
|
-
host,
|
|
13448
|
-
externalWorkerUrl: serverUrl,
|
|
13449
|
-
startupContext: {
|
|
13450
|
-
cliCommand: "web"
|
|
13451
|
-
}
|
|
13452
|
-
});
|
|
13453
|
-
logger.info(
|
|
13454
|
-
`🚀 Starting web interface${serverUrl ? ` connecting to worker at ${serverUrl}` : ""}`
|
|
13455
|
-
);
|
|
13456
|
-
const appServer = await startAppServer(docService, pipeline, config);
|
|
13457
|
-
registerGlobalServices({
|
|
13458
|
-
appServer,
|
|
13459
|
-
docService
|
|
13460
|
-
// pipeline is owned by AppServer - don't register globally to avoid double shutdown
|
|
13461
|
-
});
|
|
13462
|
-
await new Promise(() => {
|
|
13463
|
-
});
|
|
13464
|
-
} catch (error) {
|
|
13465
|
-
logger.error(`❌ Failed to start web interface: ${error}`);
|
|
13466
|
-
process.exit(1);
|
|
13467
13610
|
}
|
|
13468
|
-
|
|
13611
|
+
);
|
|
13469
13612
|
}
|
|
13470
13613
|
function createWorkerCommand(program) {
|
|
13471
13614
|
return program.command("worker").description("Start external pipeline worker (HTTP API)").addOption(
|
|
13472
|
-
new Option("--port <number>", "Port for worker API").argParser((v) => {
|
|
13615
|
+
new Option("--port <number>", "Port for worker API").env("DOCS_MCP_PORT").env("PORT").default("8080").argParser((v) => {
|
|
13473
13616
|
const n = Number(v);
|
|
13474
13617
|
if (!Number.isInteger(n) || n < 1 || n > 65535) {
|
|
13475
13618
|
throw new Error("Port must be an integer between 1 and 65535");
|
|
13476
13619
|
}
|
|
13477
13620
|
return String(n);
|
|
13478
|
-
})
|
|
13621
|
+
})
|
|
13479
13622
|
).addOption(
|
|
13480
|
-
new Option("--host <host>", "Host to bind the worker API to").
|
|
13481
|
-
).
|
|
13482
|
-
|
|
13483
|
-
|
|
13484
|
-
|
|
13485
|
-
|
|
13486
|
-
|
|
13487
|
-
|
|
13488
|
-
|
|
13489
|
-
|
|
13490
|
-
|
|
13491
|
-
|
|
13492
|
-
|
|
13493
|
-
};
|
|
13494
|
-
const pipeline = await createPipelineWithCallbacks(docService, pipelineOptions);
|
|
13495
|
-
const config = createAppServerConfig({
|
|
13496
|
-
enableWebInterface: false,
|
|
13497
|
-
enableMcpServer: false,
|
|
13498
|
-
enableApiServer: true,
|
|
13499
|
-
enableWorker: true,
|
|
13500
|
-
port,
|
|
13501
|
-
host,
|
|
13502
|
-
startupContext: {
|
|
13503
|
-
cliCommand: "worker"
|
|
13504
|
-
}
|
|
13505
|
-
});
|
|
13506
|
-
const appServer = await startAppServer(docService, pipeline, config);
|
|
13507
|
-
registerGlobalServices({
|
|
13508
|
-
appServer,
|
|
13509
|
-
docService
|
|
13510
|
-
// pipeline is owned by AppServer - don't register globally to avoid double shutdown
|
|
13511
|
-
});
|
|
13512
|
-
await new Promise(() => {
|
|
13623
|
+
new Option("--host <host>", "Host to bind the worker API to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
|
|
13624
|
+
).addOption(
|
|
13625
|
+
new Option(
|
|
13626
|
+
"--embedding-model <model>",
|
|
13627
|
+
"Embedding model configuration (e.g., 'openai:text-embedding-3-small')"
|
|
13628
|
+
).env("DOCS_MCP_EMBEDDING_MODEL")
|
|
13629
|
+
).option("--resume", "Resume interrupted jobs on startup", true).option("--no-resume", "Do not resume jobs on startup").action(
|
|
13630
|
+
async (cmdOptions) => {
|
|
13631
|
+
await analytics.track(TelemetryEvent.CLI_COMMAND, {
|
|
13632
|
+
command: "worker",
|
|
13633
|
+
port: cmdOptions.port,
|
|
13634
|
+
host: cmdOptions.host,
|
|
13635
|
+
resume: cmdOptions.resume
|
|
13513
13636
|
});
|
|
13514
|
-
|
|
13515
|
-
|
|
13516
|
-
|
|
13637
|
+
const port = validatePort(cmdOptions.port);
|
|
13638
|
+
const host = validateHost(cmdOptions.host);
|
|
13639
|
+
try {
|
|
13640
|
+
logger.info(`🚀 Starting external pipeline worker on port ${port}`);
|
|
13641
|
+
ensurePlaywrightBrowsersInstalled();
|
|
13642
|
+
const embeddingConfig = resolveEmbeddingContext(cmdOptions.embeddingModel);
|
|
13643
|
+
const docService = await createLocalDocumentManagement(embeddingConfig);
|
|
13644
|
+
const pipelineOptions = {
|
|
13645
|
+
recoverJobs: cmdOptions.resume,
|
|
13646
|
+
// Use the resume option
|
|
13647
|
+
concurrency: CLI_DEFAULTS.MAX_CONCURRENCY
|
|
13648
|
+
};
|
|
13649
|
+
const pipeline = await createPipelineWithCallbacks(docService, pipelineOptions);
|
|
13650
|
+
const config = createAppServerConfig({
|
|
13651
|
+
enableWebInterface: false,
|
|
13652
|
+
enableMcpServer: false,
|
|
13653
|
+
enableApiServer: true,
|
|
13654
|
+
enableWorker: true,
|
|
13655
|
+
port,
|
|
13656
|
+
host,
|
|
13657
|
+
startupContext: {
|
|
13658
|
+
cliCommand: "worker"
|
|
13659
|
+
}
|
|
13660
|
+
});
|
|
13661
|
+
const appServer = await startAppServer(docService, pipeline, config);
|
|
13662
|
+
registerGlobalServices({
|
|
13663
|
+
appServer,
|
|
13664
|
+
docService
|
|
13665
|
+
// pipeline is owned by AppServer - don't register globally to avoid double shutdown
|
|
13666
|
+
});
|
|
13667
|
+
await new Promise(() => {
|
|
13668
|
+
});
|
|
13669
|
+
} catch (error) {
|
|
13670
|
+
logger.error(`❌ Failed to start external pipeline worker: ${error}`);
|
|
13671
|
+
process.exit(1);
|
|
13672
|
+
}
|
|
13517
13673
|
}
|
|
13518
|
-
|
|
13674
|
+
);
|
|
13519
13675
|
}
|
|
13520
13676
|
function createCliProgram() {
|
|
13521
13677
|
const program = new Command();
|
|
13522
13678
|
const commandStartTimes = /* @__PURE__ */ new Map();
|
|
13523
13679
|
program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version(packageJson.version).addOption(
|
|
13524
13680
|
new Option("--verbose", "Enable verbose (debug) logging").conflicts("silent")
|
|
13525
|
-
).addOption(new Option("--silent", "Disable all logging except errors")).addOption(
|
|
13681
|
+
).addOption(new Option("--silent", "Disable all logging except errors")).addOption(
|
|
13682
|
+
new Option("--telemetry", "Enable telemetry collection").env("DOCS_MCP_TELEMETRY").argParser((value) => {
|
|
13683
|
+
if (value === void 0) {
|
|
13684
|
+
return process.env.DOCS_MCP_TELEMETRY !== "false" && process.env.DOCS_MCP_TELEMETRY !== "0";
|
|
13685
|
+
}
|
|
13686
|
+
return value;
|
|
13687
|
+
}).default(true)
|
|
13688
|
+
).addOption(new Option("--no-telemetry", "Disable telemetry collection")).addOption(
|
|
13689
|
+
new Option("--store-path <path>", "Custom path for data storage directory").env(
|
|
13690
|
+
"DOCS_MCP_STORE_PATH"
|
|
13691
|
+
)
|
|
13692
|
+
).enablePositionalOptions().allowExcessArguments(false).showHelpAfterError(true);
|
|
13526
13693
|
program.hook("preAction", async (thisCommand, actionCommand) => {
|
|
13527
13694
|
const globalOptions = thisCommand.opts();
|
|
13528
13695
|
setupLogging(globalOptions);
|
|
13696
|
+
initTelemetry({
|
|
13697
|
+
enabled: globalOptions.telemetry ?? true,
|
|
13698
|
+
storePath: globalOptions.storePath
|
|
13699
|
+
});
|
|
13529
13700
|
if (shouldEnableTelemetry()) {
|
|
13530
13701
|
if (analytics.isEnabled()) {
|
|
13531
13702
|
analytics.setGlobalContext({
|
|
@@ -13539,8 +13710,6 @@ function createCliProgram() {
|
|
|
13539
13710
|
commandStartTimes.set(commandKey, Date.now());
|
|
13540
13711
|
actionCommand._trackingKey = commandKey;
|
|
13541
13712
|
}
|
|
13542
|
-
} else {
|
|
13543
|
-
TelemetryConfig.getInstance().disable();
|
|
13544
13713
|
}
|
|
13545
13714
|
});
|
|
13546
13715
|
program.hook("postAction", async (_thisCommand, actionCommand) => {
|