@arabold/docs-mcp-server 2.1.1 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ -- Migration: Create metadata table for tracking global configuration state.
2
+ -- Used to persist the active embedding model and vector dimension so the system
3
+ -- can detect configuration changes between startups and prevent silent data corruption.
4
+ CREATE TABLE IF NOT EXISTS metadata (
5
+ key TEXT PRIMARY KEY,
6
+ value TEXT NOT NULL
7
+ );
package/dist/index.js CHANGED
@@ -4,8 +4,8 @@ import { s as sanitizeEnvironment, l as logger } from "./logger-CLtABTNb.js";
4
4
  process.setSourceMapsEnabled(true);
5
5
  sanitizeEnvironment();
6
6
  const [{ runCli }, { ensurePlaywrightBrowsersInstalled }] = await Promise.all([
7
- import("./main-ntnRQ8Za.js"),
8
- import("./utils-CZz1DsHw.js").then((n) => n.u)
7
+ import("./main-C6In2ps7.js"),
8
+ import("./utils-ntayxPjd.js").then((n) => n.u)
9
9
  ]);
10
10
  ensurePlaywrightBrowsersInstalled();
11
11
  runCli().catch((error) => {
@@ -9,7 +9,7 @@ import path from "node:path";
9
9
  import { URL as URL$1 } from "node:url";
10
10
  import envPaths from "env-paths";
11
11
  import { l as logger, n as normalizeEnvValue, g as getLogLevelFromEnv, a as setLogLevel, L as LogLevel } from "./logger-CLtABTNb.js";
12
- import { t as telemetry, T as TelemetryEvent, P as PipelineJobStatus, g as getProjectRoot, E as EmbeddingConfig, a as EventType, S as ServerEventName, s as shouldEnableTelemetry, r as resolveProtocol, p as parseAuthConfig, v as validateAuthConfig, w as warnHttpUsage, e as ensurePlaywrightBrowsersInstalled, b as getEventBus, c as createAppServerConfig, d as parseHeaders, f as validatePort, h as validateHost, i as resolveStorePath, j as initTelemetry, k as EventBusService, l as TelemetryService } from "./utils-CZz1DsHw.js";
12
+ import { t as telemetry, T as TelemetryEvent, P as PipelineJobStatus, g as getProjectRoot, E as EmbeddingConfig, a as EventType, S as ServerEventName, s as shouldEnableTelemetry, r as resolveProtocol, p as parseAuthConfig, v as validateAuthConfig, w as warnHttpUsage, e as ensurePlaywrightBrowsersInstalled, b as getEventBus, h as handleEmbeddingModelChange, c as createAppServerConfig, d as parseHeaders, f as validatePort, i as validateHost, j as resolveStorePath, k as initTelemetry, l as EventBusService, m as TelemetryService } from "./utils-ntayxPjd.js";
13
13
  import yargs from "yargs";
14
14
  import { hideBin } from "yargs/helpers";
15
15
  import yaml from "yaml";
@@ -117,6 +117,23 @@ class DimensionError extends StoreError {
117
117
  }
118
118
  class ConnectionError extends StoreError {
119
119
  }
120
+ class EmbeddingModelChangedError extends StoreError {
121
+ constructor(previousModel, previousDimension, currentModel, currentDimension) {
122
+ super(
123
+ `Embedding model change detected:
124
+ Previous: ${previousModel} (${previousDimension} dimensions)
125
+ Current: ${currentModel} (${currentDimension} dimensions)
126
+
127
+ All existing vectors are incompatible and must be invalidated.
128
+ To confirm this change, start the server interactively (with a TTY connected)
129
+ and follow the prompts.`
130
+ );
131
+ this.previousModel = previousModel;
132
+ this.previousDimension = previousDimension;
133
+ this.currentModel = currentModel;
134
+ this.currentDimension = currentDimension;
135
+ }
136
+ }
120
137
  class MissingCredentialsError extends StoreError {
121
138
  constructor(provider, missingCredentials) {
122
139
  super(
@@ -456,7 +473,7 @@ const AppConfigSchema = z.object({
456
473
  batchChars: z.coerce.number().int().default(DEFAULT_CONFIG.embeddings.batchChars),
457
474
  requestTimeoutMs: z.coerce.number().int().default(DEFAULT_CONFIG.embeddings.requestTimeoutMs),
458
475
  initTimeoutMs: z.coerce.number().int().default(DEFAULT_CONFIG.embeddings.initTimeoutMs),
459
- vectorDimension: z.coerce.number().int().default(DEFAULT_CONFIG.embeddings.vectorDimension)
476
+ vectorDimension: z.coerce.number().int().min(1, "embedding dimension must be at least 1").default(DEFAULT_CONFIG.embeddings.vectorDimension)
460
477
  }).default(DEFAULT_CONFIG.embeddings),
461
478
  db: z.object({
462
479
  migrationMaxRetries: z.coerce.number().int().default(DEFAULT_CONFIG.db.migrationMaxRetries),
@@ -1963,6 +1980,17 @@ class ChallengeError extends ScraperError {
1963
1980
  this.challengeType = challengeType;
1964
1981
  }
1965
1982
  }
1983
+ class TlsCertificateError extends ScraperError {
1984
+ constructor(url, code, cause) {
1985
+ super(
1986
+ `TLS certificate validation failed for ${url}${code ? ` (${code})` : ""}. The remote site may have an incomplete or untrusted certificate chain.`,
1987
+ false,
1988
+ cause
1989
+ );
1990
+ this.url = url;
1991
+ this.code = code;
1992
+ }
1993
+ }
1966
1994
  class MimeTypeUtils {
1967
1995
  /**
1968
1996
  * Parses a Content-Type header string into its MIME type and charset.
@@ -2691,6 +2719,15 @@ class HttpFetcher {
2691
2719
  "EPERM"
2692
2720
  // Operation not permitted
2693
2721
  ];
2722
+ tlsCertificateErrorCodes = [
2723
+ "CERT_HAS_EXPIRED",
2724
+ "DEPTH_ZERO_SELF_SIGNED_CERT",
2725
+ "ERR_TLS_CERT_ALTNAME_INVALID",
2726
+ "SELF_SIGNED_CERT_IN_CHAIN",
2727
+ "UNABLE_TO_GET_ISSUER_CERT",
2728
+ "UNABLE_TO_GET_ISSUER_CERT_LOCALLY",
2729
+ "UNABLE_TO_VERIFY_LEAF_SIGNATURE"
2730
+ ];
2694
2731
  fingerprintGenerator;
2695
2732
  constructor(scraperConfig) {
2696
2733
  this.maxRetriesDefault = scraperConfig.fetcher.maxRetries;
@@ -2703,6 +2740,9 @@ class HttpFetcher {
2703
2740
  async delay(ms) {
2704
2741
  return new Promise((resolve) => setTimeout(resolve, ms));
2705
2742
  }
2743
+ isTlsCertificateError(code) {
2744
+ return code ? this.tlsCertificateErrorCodes.includes(code) : false;
2745
+ }
2706
2746
  async fetch(source, options) {
2707
2747
  const maxRetries = options?.maxRetries ?? this.maxRetriesDefault;
2708
2748
  const baseDelay = options?.retryDelay ?? this.baseDelayDefaultMs;
@@ -2799,6 +2839,7 @@ class HttpFetcher {
2799
2839
  const axiosError = error;
2800
2840
  const status = axiosError.response?.status;
2801
2841
  const code = axiosError.code;
2842
+ const errorCause = error instanceof Error ? error : void 0;
2802
2843
  if (options?.signal?.aborted || code === "ERR_CANCELED") {
2803
2844
  throw new CancellationError("HTTP fetch cancelled");
2804
2845
  }
@@ -2838,6 +2879,9 @@ class HttpFetcher {
2838
2879
  throw new ChallengeError(source, status, "cloudflare");
2839
2880
  }
2840
2881
  }
2882
+ if (this.isTlsCertificateError(code)) {
2883
+ throw new TlsCertificateError(source, code, errorCause);
2884
+ }
2841
2885
  if (attempt < maxRetries && (status === void 0 || this.retryableStatusCodes.includes(status)) && !this.nonRetryableErrorCodes.includes(code ?? "")) {
2842
2886
  const delay = baseDelay * 2 ** attempt;
2843
2887
  logger.warn(
@@ -2849,7 +2893,7 @@ class HttpFetcher {
2849
2893
  throw new ScraperError(
2850
2894
  `Failed to fetch ${source} after ${attempt + 1} attempts: ${axiosError.message ?? "Unknown error"}`,
2851
2895
  true,
2852
- error instanceof Error ? error : void 0
2896
+ errorCause
2853
2897
  );
2854
2898
  }
2855
2899
  }
@@ -2894,6 +2938,12 @@ class AutoDetectFetcher {
2894
2938
  );
2895
2939
  return this.browserFetcher.fetch(source, options);
2896
2940
  }
2941
+ if (error instanceof TlsCertificateError) {
2942
+ logger.info(
2943
+ `🔄 TLS certificate validation failed for ${source}, falling back to browser fetcher...`
2944
+ );
2945
+ return this.browserFetcher.fetch(source, options);
2946
+ }
2897
2947
  throw error;
2898
2948
  }
2899
2949
  }
@@ -9183,6 +9233,91 @@ class DocumentStore {
9183
9233
  }
9184
9234
  return [...vector, ...new Array(this.dbDimension - vector.length).fill(0)];
9185
9235
  }
9236
+ /**
9237
+ * Reads the stored embedding model and dimension from the metadata table.
9238
+ * Returns null for each value that doesn't exist (e.g., first run or pre-metadata database).
9239
+ */
9240
+ getEmbeddingMetadata() {
9241
+ const modelRow = this.db.prepare("SELECT value FROM metadata WHERE key = ?").get("embedding_model");
9242
+ const dimensionRow = this.db.prepare("SELECT value FROM metadata WHERE key = ?").get("embedding_dimension");
9243
+ return {
9244
+ model: modelRow?.value ?? null,
9245
+ dimension: dimensionRow?.value ?? null
9246
+ };
9247
+ }
9248
+ /**
9249
+ * Persists the active embedding model and dimension to the metadata table.
9250
+ * Uses upsert (INSERT ON CONFLICT UPDATE) so it works for both first-run and subsequent updates.
9251
+ * Uses inline db.prepare() so this method works before prepareStatements() is called.
9252
+ */
9253
+ setEmbeddingMetadata(model, dimension) {
9254
+ const stmt = this.db.prepare(
9255
+ "INSERT INTO metadata (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value"
9256
+ );
9257
+ stmt.run("embedding_model", model);
9258
+ stmt.run("embedding_dimension", String(dimension));
9259
+ }
9260
+ /**
9261
+ * Compares the configured embedding model and dimension against stored metadata.
9262
+ * Throws EmbeddingModelChangedError if either has changed since the last run.
9263
+ *
9264
+ * Skipped when:
9265
+ * - No metadata exists (first run / upgrade from pre-metadata DB → silent initialization)
9266
+ * - No embedding model is configured (FTS-only mode)
9267
+ * - Credentials are unavailable for the configured provider (will fall back to FTS-only)
9268
+ */
9269
+ checkEmbeddingModelChange() {
9270
+ if (!this.embeddingConfig) {
9271
+ return;
9272
+ }
9273
+ if (!areCredentialsAvailable(this.embeddingConfig.provider)) {
9274
+ return;
9275
+ }
9276
+ const stored = this.getEmbeddingMetadata();
9277
+ if (stored.model === null) {
9278
+ return;
9279
+ }
9280
+ const currentModel = this.config.app.embeddingModel;
9281
+ const currentDimension = String(this.config.embeddings.vectorDimension);
9282
+ const modelChanged = stored.model !== currentModel;
9283
+ const dimensionChanged = stored.dimension !== null && stored.dimension !== currentDimension;
9284
+ if (modelChanged || dimensionChanged) {
9285
+ throw new EmbeddingModelChangedError(
9286
+ stored.model,
9287
+ stored.dimension ?? "unknown",
9288
+ currentModel,
9289
+ currentDimension
9290
+ );
9291
+ }
9292
+ }
9293
+ /**
9294
+ * Invalidates all existing embedding vectors after a confirmed model/dimension change.
9295
+ * Sets all document embeddings to NULL, drops and recreates the vec table as empty,
9296
+ * and updates the metadata with the new model and dimension.
9297
+ *
9298
+ * After invalidation, FTS search continues working; vector search returns no results
9299
+ * until libraries are re-scraped.
9300
+ */
9301
+ invalidateAllVectors(newModel, newDimension) {
9302
+ logger.warn(
9303
+ `⚠️ Invalidating all embedding vectors due to model/dimension change.
9304
+ All libraries must be re-scraped to restore vector search.
9305
+ Full-text search remains available for all existing documents.`
9306
+ );
9307
+ this.db.exec("UPDATE documents SET embedding = NULL");
9308
+ this.db.exec("DROP TABLE IF EXISTS documents_vec");
9309
+ this.db.exec(`
9310
+ CREATE VIRTUAL TABLE documents_vec USING vec0(
9311
+ library_id INTEGER NOT NULL,
9312
+ version_id INTEGER NOT NULL,
9313
+ embedding FLOAT[${newDimension}]
9314
+ );
9315
+ `);
9316
+ this.setEmbeddingMetadata(newModel, newDimension);
9317
+ logger.info(
9318
+ `✅ Embedding vectors invalidated. Metadata updated to: ${newModel} (${newDimension}d)`
9319
+ );
9320
+ }
9186
9321
  /**
9187
9322
  * Initialize the embeddings client using the provided config.
9188
9323
  * If no embedding config is provided (null or undefined), embeddings will not be initialized.
@@ -9251,6 +9386,7 @@ class DocumentStore {
9251
9386
  logger.debug(
9252
9387
  `Embeddings initialized: ${config.provider}:${config.model} (${this.modelDimension}d)`
9253
9388
  );
9389
+ this.setEmbeddingMetadata(config.modelSpec, this.dbDimension);
9254
9390
  } catch (error) {
9255
9391
  if (error instanceof Error) {
9256
9392
  if (error.message.includes("does not exist") || error.message.includes("MODEL_NOT_FOUND")) {
@@ -9356,6 +9492,8 @@ class DocumentStore {
9356
9492
  maxRetries: this.config.db.migrationMaxRetries,
9357
9493
  retryDelayMs: this.config.db.migrationRetryDelayMs
9358
9494
  });
9495
+ this.checkEmbeddingModelChange();
9496
+ this.ensureVectorTable();
9359
9497
  this.prepareStatements();
9360
9498
  await this.initializeEmbeddings();
9361
9499
  } catch (error) {
@@ -9365,12 +9503,66 @@ class DocumentStore {
9365
9503
  throw new ConnectionError("Failed to initialize database connection", error);
9366
9504
  }
9367
9505
  }
9506
+ /**
9507
+ * Resolves a model change by invalidating all vectors and completing initialization.
9508
+ * Called by the CLI layer after the user confirms a model/dimension change.
9509
+ * Assumes initialize() was previously called and threw EmbeddingModelChangedError,
9510
+ * meaning steps 1-2 (load extensions, apply migrations) completed successfully.
9511
+ * Steps 3+ (ensureVectorTable, prepareStatements, initializeEmbeddings) are
9512
+ * completed here after invalidation.
9513
+ */
9514
+ async resolveModelChange() {
9515
+ const currentModel = this.config.app.embeddingModel;
9516
+ const currentDimension = this.config.embeddings.vectorDimension;
9517
+ this.invalidateAllVectors(currentModel, currentDimension);
9518
+ this.ensureVectorTable();
9519
+ this.prepareStatements();
9520
+ await this.initializeEmbeddings();
9521
+ }
9368
9522
  /**
9369
9523
  * Gracefully closes database connections
9370
9524
  */
9371
9525
  async shutdown() {
9372
9526
  this.db.close();
9373
9527
  }
9528
+ /**
9529
+ * Creates or reconciles the documents_vec virtual table with configurable dimension.
9530
+ * Called after migrations and model change detection. The table is initially created
9531
+ * by migration 003 with a fixed 1536 dimension; this method reconciles it at runtime
9532
+ * if the configured dimension differs.
9533
+ * Idempotent: if the table already exists with the same dimension, no-op; if dimension
9534
+ * changed in config, drops and recreates so any embedding provider (e.g. 1536 or 3584) works.
9535
+ *
9536
+ * Note: No backfill of existing embeddings is performed. Vectors are populated during
9537
+ * scraping, not at startup. Old vectors from a different dimension or model are incompatible
9538
+ * and are handled by the model change detection system (checkEmbeddingModelChange).
9539
+ */
9540
+ ensureVectorTable() {
9541
+ const dim = this.config.embeddings.vectorDimension;
9542
+ if (typeof dim !== "number" || !Number.isInteger(dim) || dim < 1) {
9543
+ throw new StoreError(
9544
+ `Invalid embeddings.vectorDimension: ${dim}. Must be a positive integer.`
9545
+ );
9546
+ }
9547
+ const existingSql = this.db.prepare(
9548
+ "SELECT sql FROM sqlite_master WHERE type = 'table' AND name = 'documents_vec';"
9549
+ ).get();
9550
+ if (existingSql) {
9551
+ const match = existingSql.sql.match(/embedding\s+FLOAT\s*\[\s*(\d+)\s*]/i);
9552
+ const existingDim = match ? Number(match[1]) : null;
9553
+ if (existingDim === dim) {
9554
+ return;
9555
+ }
9556
+ this.db.exec("DROP TABLE documents_vec;");
9557
+ }
9558
+ this.db.exec(`
9559
+ CREATE VIRTUAL TABLE documents_vec USING vec0(
9560
+ library_id INTEGER NOT NULL,
9561
+ version_id INTEGER NOT NULL,
9562
+ embedding FLOAT[${dim}]
9563
+ );
9564
+ `);
9565
+ }
9374
9566
  /**
9375
9567
  * Resolves a library name and version string to version_id.
9376
9568
  * Creates library and version records if they don't exist.
@@ -10287,6 +10479,13 @@ class DocumentManagementService {
10287
10479
  async initialize() {
10288
10480
  await this.store.initialize();
10289
10481
  }
10482
+ /**
10483
+ * Resolves a confirmed embedding model change by invalidating all vectors
10484
+ * and completing the initialization that was interrupted by EmbeddingModelChangedError.
10485
+ */
10486
+ async resolveModelChange() {
10487
+ await this.store.resolveModelChange();
10488
+ }
10290
10489
  /**
10291
10490
  * Shuts down the underlying document store and cleans up pipeline resources.
10292
10491
  */
@@ -11461,7 +11660,7 @@ const Layout = ({
11461
11660
  children,
11462
11661
  eventClientConfig
11463
11662
  }) => {
11464
- const versionString = version || "2.1.1";
11663
+ const versionString = version || "2.2.1";
11465
11664
  const versionInitializer = `versionUpdate({ currentVersion: ${`'${versionString}'`} })`;
11466
11665
  return /* @__PURE__ */ jsxs("html", { lang: "en", children: [
11467
11666
  /* @__PURE__ */ jsxs("head", { children: [
@@ -13813,7 +14012,7 @@ class AppServer {
13813
14012
  try {
13814
14013
  if (telemetry.isEnabled()) {
13815
14014
  telemetry.setGlobalContext({
13816
- appVersion: "2.1.1",
14015
+ appVersion: "2.2.1",
13817
14016
  appPlatform: process.platform,
13818
14017
  appNodeVersion: process.version,
13819
14018
  appServicesEnabled: this.getActiveServicesList(),
@@ -17007,7 +17206,16 @@ function createDefaultAction(cli) {
17007
17206
  }
17008
17207
  ensurePlaywrightBrowsersInstalled();
17009
17208
  const eventBus = getEventBus(argv);
17010
- const docService = await createLocalDocumentManagement(eventBus, appConfig);
17209
+ const docService = new DocumentManagementService(eventBus, appConfig);
17210
+ try {
17211
+ await docService.initialize();
17212
+ } catch (error) {
17213
+ if (error instanceof EmbeddingModelChangedError) {
17214
+ await handleEmbeddingModelChange(error, docService);
17215
+ } else {
17216
+ throw error;
17217
+ }
17218
+ }
17011
17219
  const pipelineOptions = {
17012
17220
  recoverJobs: argv.resume || false,
17013
17221
  appConfig
@@ -17290,11 +17498,24 @@ function createMcpCommand(cli) {
17290
17498
  try {
17291
17499
  const serverUrl = argv.serverUrl;
17292
17500
  const eventBus = getEventBus(argv);
17293
- const docService = await createDocumentManagement({
17294
- serverUrl,
17295
- eventBus,
17296
- appConfig
17297
- });
17501
+ let docService;
17502
+ if (serverUrl) {
17503
+ const client = new DocumentManagementClient(serverUrl);
17504
+ await client.initialize();
17505
+ docService = client;
17506
+ } else {
17507
+ const service = new DocumentManagementService(eventBus, appConfig);
17508
+ try {
17509
+ await service.initialize();
17510
+ } catch (error) {
17511
+ if (error instanceof EmbeddingModelChangedError) {
17512
+ await handleEmbeddingModelChange(error, service);
17513
+ } else {
17514
+ throw error;
17515
+ }
17516
+ }
17517
+ docService = service;
17518
+ }
17298
17519
  const pipelineOptions = {
17299
17520
  recoverJobs: false,
17300
17521
  // MCP command doesn't support job recovery
@@ -17972,7 +18193,7 @@ function createCli(argv) {
17972
18193
  let globalEventBus = null;
17973
18194
  let globalTelemetryService = null;
17974
18195
  const commandStartTimes = /* @__PURE__ */ new Map();
17975
- const cli = registerGlobalOutputOptions(yargs(hideBin(argv))).scriptName("docs-mcp-server").strict().usage("Usage: $0 <command> [options]").version("2.1.1").option("verbose", {
18196
+ const cli = registerGlobalOutputOptions(yargs(hideBin(argv))).scriptName("docs-mcp-server").strict().usage("Usage: $0 <command> [options]").version("2.2.1").option("verbose", {
17976
18197
  type: "boolean",
17977
18198
  description: "Enable verbose (debug) logging",
17978
18199
  default: false
@@ -18033,7 +18254,7 @@ function createCli(argv) {
18033
18254
  if (shouldEnableTelemetry() && telemetry.isEnabled()) {
18034
18255
  const commandName = argv2._[0]?.toString() || "default";
18035
18256
  telemetry.setGlobalContext({
18036
- appVersion: "2.1.1",
18257
+ appVersion: "2.2.1",
18037
18258
  appPlatform: process.platform,
18038
18259
  appNodeVersion: process.version,
18039
18260
  appInterface: "cli",
@@ -18179,4 +18400,4 @@ export {
18179
18400
  cleanupCliCommand,
18180
18401
  runCli
18181
18402
  };
18182
- //# sourceMappingURL=main-ntnRQ8Za.js.map
18403
+ //# sourceMappingURL=main-C6In2ps7.js.map