@arabold/docs-mcp-server 1.31.1 → 1.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1472,6 +1472,8 @@ const FETCHER_MAX_CACHE_ITEM_SIZE_BYTES = 500 * 1024;
1472
1472
  const SPLITTER_MIN_CHUNK_SIZE = 500;
1473
1473
  const SPLITTER_PREFERRED_CHUNK_SIZE = 1500;
1474
1474
  const SPLITTER_MAX_CHUNK_SIZE = 5e3;
1475
+ const JSON_MAX_NESTING_DEPTH = 5;
1476
+ const JSON_MAX_CHUNKS = 1e3;
1475
1477
  const EMBEDDING_BATCH_SIZE = 100;
1476
1478
  const EMBEDDING_BATCH_CHARS = 5e4;
1477
1479
  const MIGRATION_MAX_RETRIES = 5;
@@ -3044,16 +3046,203 @@ class GreedySplitter {
3044
3046
  return common;
3045
3047
  }
3046
3048
  }
3049
+ class TextContentSplitter {
3050
+ constructor(options) {
3051
+ this.options = options;
3052
+ }
3053
+ /**
3054
+ * Splits text content into chunks while trying to preserve semantic boundaries.
3055
+ * Prefers paragraph breaks, then line breaks, finally falling back to word boundaries.
3056
+ * Always preserves formatting - trimming should be done by higher-level splitters if needed.
3057
+ */
3058
+ async split(content) {
3059
+ if (content.length <= this.options.chunkSize) {
3060
+ return [content];
3061
+ }
3062
+ const words = content.split(/\s+/);
3063
+ const longestWord = words.reduce(
3064
+ (max, word) => word.length > max.length ? word : max
3065
+ );
3066
+ if (longestWord.length > this.options.chunkSize) {
3067
+ throw new MinimumChunkSizeError(longestWord.length, this.options.chunkSize);
3068
+ }
3069
+ const paragraphChunks = this.splitByParagraphs(content);
3070
+ if (this.areChunksValid(paragraphChunks)) {
3071
+ return paragraphChunks;
3072
+ }
3073
+ const lineChunks = this.splitByLines(content);
3074
+ if (this.areChunksValid(lineChunks)) {
3075
+ return this.mergeChunks(lineChunks, "");
3076
+ }
3077
+ const wordChunks = await this.splitByWords(content);
3078
+ return this.mergeChunks(wordChunks, " ");
3079
+ }
3080
+ /**
3081
+ * Checks if all chunks are within the maximum size limit
3082
+ */
3083
+ areChunksValid(chunks) {
3084
+ return chunks.every((chunk) => chunk.length <= this.options.chunkSize);
3085
+ }
3086
+ /**
3087
+ * Splits text into chunks by paragraph boundaries (double newlines)
3088
+ * Preserves all formatting and whitespace including the paragraph separators
3089
+ */
3090
+ splitByParagraphs(text) {
3091
+ const chunks = [];
3092
+ let startPos = 0;
3093
+ const paragraphRegex = /\n\s*\n/g;
3094
+ let match = paragraphRegex.exec(text);
3095
+ while (match !== null) {
3096
+ const endPos = match.index + match[0].length;
3097
+ const chunk = text.slice(startPos, endPos);
3098
+ if (chunk.length > 2) {
3099
+ chunks.push(chunk);
3100
+ }
3101
+ startPos = endPos;
3102
+ match = paragraphRegex.exec(text);
3103
+ }
3104
+ if (startPos < text.length) {
3105
+ const remainingChunk = text.slice(startPos);
3106
+ if (remainingChunk.length > 2) {
3107
+ chunks.push(remainingChunk);
3108
+ }
3109
+ }
3110
+ return chunks.filter(Boolean);
3111
+ }
3112
+ /**
3113
+ * Splits text into chunks by line boundaries
3114
+ * Preserves all formatting and whitespace, including newlines at the end of each line
3115
+ */
3116
+ splitByLines(text) {
3117
+ const chunks = [];
3118
+ let startPos = 0;
3119
+ for (let i = 0; i < text.length; i++) {
3120
+ if (text[i] === "\n") {
3121
+ const chunk = text.slice(startPos, i + 1);
3122
+ chunks.push(chunk);
3123
+ startPos = i + 1;
3124
+ }
3125
+ }
3126
+ if (startPos < text.length) {
3127
+ chunks.push(text.slice(startPos));
3128
+ }
3129
+ return chunks;
3130
+ }
3131
+ /**
3132
+ * Uses LangChain's recursive splitter for word-based splitting as a last resort
3133
+ */
3134
+ async splitByWords(text) {
3135
+ const splitter = new RecursiveCharacterTextSplitter({
3136
+ chunkSize: this.options.chunkSize,
3137
+ chunkOverlap: 0
3138
+ });
3139
+ const chunks = await splitter.splitText(text);
3140
+ return chunks;
3141
+ }
3142
+ /**
3143
+ * Attempts to merge small chunks with previous chunks to minimize fragmentation.
3144
+ * Only merges if combined size is within maxChunkSize.
3145
+ */
3146
+ mergeChunks(chunks, separator) {
3147
+ const mergedChunks = [];
3148
+ let currentChunk = null;
3149
+ for (const chunk of chunks) {
3150
+ if (currentChunk === null) {
3151
+ currentChunk = chunk;
3152
+ continue;
3153
+ }
3154
+ const currentChunkSize = this.getChunkSize(currentChunk);
3155
+ const nextChunkSize = this.getChunkSize(chunk);
3156
+ if (currentChunkSize + nextChunkSize + separator.length <= this.options.chunkSize) {
3157
+ currentChunk = `${currentChunk}${separator}${chunk}`;
3158
+ } else {
3159
+ mergedChunks.push(currentChunk);
3160
+ currentChunk = chunk;
3161
+ }
3162
+ }
3163
+ if (currentChunk) {
3164
+ mergedChunks.push(currentChunk);
3165
+ }
3166
+ return mergedChunks;
3167
+ }
3168
+ getChunkSize(chunk) {
3169
+ return chunk.length;
3170
+ }
3171
+ wrap(content) {
3172
+ return content;
3173
+ }
3174
+ }
3175
+ class TextDocumentSplitter {
3176
+ options;
3177
+ textSplitter;
3178
+ constructor(options = {}) {
3179
+ this.options = {
3180
+ maxChunkSize: options.maxChunkSize ?? SPLITTER_MAX_CHUNK_SIZE
3181
+ };
3182
+ this.textSplitter = new TextContentSplitter({
3183
+ chunkSize: this.options.maxChunkSize
3184
+ });
3185
+ }
3186
+ async splitText(content) {
3187
+ if (!content.trim()) {
3188
+ return [];
3189
+ }
3190
+ try {
3191
+ const chunks = await this.textSplitter.split(content);
3192
+ return chunks.map((chunk) => ({
3193
+ types: ["text"],
3194
+ content: chunk,
3195
+ section: {
3196
+ level: 0,
3197
+ path: []
3198
+ }
3199
+ }));
3200
+ } catch (error) {
3201
+ if (!(error instanceof MinimumChunkSizeError) && error instanceof Error) {
3202
+ console.warn(
3203
+ `Unexpected text splitting error: ${error.message}. Forcing character-based split.`
3204
+ );
3205
+ }
3206
+ const chunks = [];
3207
+ let offset = 0;
3208
+ while (offset < content.length) {
3209
+ const chunkContent = content.substring(
3210
+ offset,
3211
+ offset + this.options.maxChunkSize
3212
+ );
3213
+ chunks.push({
3214
+ types: ["text"],
3215
+ content: chunkContent,
3216
+ section: {
3217
+ level: 0,
3218
+ path: []
3219
+ }
3220
+ });
3221
+ offset += this.options.maxChunkSize;
3222
+ }
3223
+ return chunks;
3224
+ }
3225
+ }
3226
+ }
3047
3227
  class JsonDocumentSplitter {
3048
3228
  preserveFormatting;
3229
+ maxDepth;
3230
+ maxChunks;
3231
+ textFallbackSplitter;
3049
3232
  constructor(options = {}) {
3050
3233
  this.preserveFormatting = options.preserveFormatting ?? true;
3234
+ this.maxDepth = options.maxDepth ?? JSON_MAX_NESTING_DEPTH;
3235
+ this.maxChunks = options.maxChunks ?? JSON_MAX_CHUNKS;
3236
+ this.textFallbackSplitter = new TextDocumentSplitter();
3051
3237
  }
3052
3238
  async splitText(content, _contentType) {
3053
3239
  try {
3054
3240
  const parsed = JSON.parse(content);
3055
3241
  const chunks = [];
3056
- this.processValue(parsed, ["root"], 1, 0, chunks, true);
3242
+ await this.processValue(parsed, ["root"], 1, 0, chunks, true);
3243
+ if (chunks.length > this.maxChunks) {
3244
+ return this.textFallbackSplitter.splitText(content);
3245
+ }
3057
3246
  return chunks;
3058
3247
  } catch {
3059
3248
  return [
@@ -3068,16 +3257,20 @@ class JsonDocumentSplitter {
3068
3257
  ];
3069
3258
  }
3070
3259
  }
3071
- processValue(value, path2, level, indentLevel, chunks, isLastItem) {
3260
+ async processValue(value, path2, level, indentLevel, chunks, isLastItem) {
3261
+ if (level > this.maxDepth) {
3262
+ await this.processValueAsText(value, path2, level, indentLevel, chunks, isLastItem);
3263
+ return;
3264
+ }
3072
3265
  if (Array.isArray(value)) {
3073
- this.processArray(value, path2, level, indentLevel, chunks, isLastItem);
3266
+ await this.processArray(value, path2, level, indentLevel, chunks, isLastItem);
3074
3267
  } else if (value !== null && typeof value === "object") {
3075
- this.processObject(value, path2, level, indentLevel, chunks, isLastItem);
3268
+ await this.processObject(value, path2, level, indentLevel, chunks, isLastItem);
3076
3269
  } else {
3077
- this.processPrimitive(value, path2, level, indentLevel, chunks, isLastItem);
3270
+ await this.processPrimitive(value, path2, level, indentLevel, chunks, isLastItem);
3078
3271
  }
3079
3272
  }
3080
- processArray(array, path2, level, indentLevel, chunks, isLastItem) {
3273
+ async processArray(array, path2, level, indentLevel, chunks, isLastItem) {
3081
3274
  const indent = this.getIndent(indentLevel);
3082
3275
  const comma = isLastItem ? "" : ",";
3083
3276
  chunks.push({
@@ -3085,18 +3278,19 @@ class JsonDocumentSplitter {
3085
3278
  content: `${indent}[`,
3086
3279
  section: { level, path: [...path2] }
3087
3280
  });
3088
- array.forEach((item, index) => {
3281
+ for (let index = 0; index < array.length; index++) {
3282
+ const item = array[index];
3089
3283
  const isLast = index === array.length - 1;
3090
3284
  const itemPath = [...path2, `[${index}]`];
3091
- this.processValue(item, itemPath, level + 1, indentLevel + 1, chunks, isLast);
3092
- });
3285
+ await this.processValue(item, itemPath, level + 1, indentLevel + 1, chunks, isLast);
3286
+ }
3093
3287
  chunks.push({
3094
3288
  types: ["code"],
3095
3289
  content: `${indent}]${comma}`,
3096
3290
  section: { level, path: [...path2] }
3097
3291
  });
3098
3292
  }
3099
- processObject(obj, path2, level, indentLevel, chunks, isLastItem) {
3293
+ async processObject(obj, path2, level, indentLevel, chunks, isLastItem) {
3100
3294
  const indent = this.getIndent(indentLevel);
3101
3295
  const comma = isLastItem ? "" : ",";
3102
3296
  const entries = Object.entries(obj);
@@ -3105,10 +3299,11 @@ class JsonDocumentSplitter {
3105
3299
  content: `${indent}{`,
3106
3300
  section: { level, path: [...path2] }
3107
3301
  });
3108
- entries.forEach(([key, value], index) => {
3302
+ for (let index = 0; index < entries.length; index++) {
3303
+ const [key, value] = entries[index];
3109
3304
  const isLast = index === entries.length - 1;
3110
3305
  const propertyPath = [...path2, key];
3111
- this.processProperty(
3306
+ await this.processProperty(
3112
3307
  key,
3113
3308
  value,
3114
3309
  propertyPath,
@@ -3117,14 +3312,14 @@ class JsonDocumentSplitter {
3117
3312
  chunks,
3118
3313
  isLast
3119
3314
  );
3120
- });
3315
+ }
3121
3316
  chunks.push({
3122
3317
  types: ["code"],
3123
3318
  content: `${indent}}${comma}`,
3124
3319
  section: { level, path: [...path2] }
3125
3320
  });
3126
3321
  }
3127
- processProperty(key, value, path2, level, indentLevel, chunks, isLastProperty) {
3322
+ async processProperty(key, value, path2, level, indentLevel, chunks, isLastProperty) {
3128
3323
  const indent = this.getIndent(indentLevel);
3129
3324
  if (typeof value === "object" && value !== null) {
3130
3325
  chunks.push({
@@ -3132,30 +3327,98 @@ class JsonDocumentSplitter {
3132
3327
  content: `${indent}"${key}": `,
3133
3328
  section: { level, path: path2 }
3134
3329
  });
3135
- this.processValue(value, path2, level, indentLevel, chunks, isLastProperty);
3330
+ await this.processValue(value, path2, level, indentLevel, chunks, isLastProperty);
3136
3331
  } else {
3137
3332
  const comma = isLastProperty ? "" : ",";
3138
3333
  const formattedValue = JSON.stringify(value);
3139
- chunks.push({
3140
- types: ["code"],
3141
- content: `${indent}"${key}": ${formattedValue}${comma}`,
3142
- section: { level, path: path2 }
3143
- });
3334
+ const fullContent = `${indent}"${key}": ${formattedValue}${comma}`;
3335
+ if (fullContent.length > SPLITTER_MAX_CHUNK_SIZE) {
3336
+ const textChunks = await this.textFallbackSplitter.splitText(formattedValue);
3337
+ chunks.push({
3338
+ types: ["code"],
3339
+ content: `${indent}"${key}": `,
3340
+ section: { level, path: path2 }
3341
+ });
3342
+ textChunks.forEach((textChunk, index) => {
3343
+ const isLastChunk = index === textChunks.length - 1;
3344
+ const content = `${textChunk.content}${isLastChunk ? comma : ""}`;
3345
+ chunks.push({
3346
+ types: ["code"],
3347
+ content,
3348
+ section: { level, path: path2 }
3349
+ });
3350
+ });
3351
+ } else {
3352
+ chunks.push({
3353
+ types: ["code"],
3354
+ content: fullContent,
3355
+ section: { level, path: path2 }
3356
+ });
3357
+ }
3144
3358
  }
3145
3359
  }
3146
- processPrimitive(value, path2, level, indentLevel, chunks, isLastItem) {
3360
+ async processPrimitive(value, path2, level, indentLevel, chunks, isLastItem) {
3147
3361
  const indent = this.getIndent(indentLevel);
3148
3362
  const comma = isLastItem ? "" : ",";
3149
3363
  const formattedValue = JSON.stringify(value);
3150
- chunks.push({
3151
- types: ["code"],
3152
- content: `${indent}${formattedValue}${comma}`,
3153
- section: { level, path: path2 }
3154
- });
3364
+ const fullContent = `${indent}${formattedValue}${comma}`;
3365
+ if (fullContent.length > SPLITTER_MAX_CHUNK_SIZE) {
3366
+ const textChunks = await this.textFallbackSplitter.splitText(formattedValue);
3367
+ textChunks.forEach((textChunk, index) => {
3368
+ const isFirstChunk = index === 0;
3369
+ const isLastChunk = index === textChunks.length - 1;
3370
+ const valueContent = isFirstChunk ? `${indent}${textChunk.content}` : textChunk.content;
3371
+ const content = `${valueContent}${isLastChunk ? comma : ""}`;
3372
+ chunks.push({
3373
+ types: ["code"],
3374
+ content,
3375
+ section: { level, path: [...path2] }
3376
+ });
3377
+ });
3378
+ } else {
3379
+ chunks.push({
3380
+ types: ["code"],
3381
+ content: fullContent,
3382
+ section: { level, path: path2 }
3383
+ });
3384
+ }
3155
3385
  }
3156
3386
  getIndent(level) {
3157
3387
  return this.preserveFormatting ? " ".repeat(level) : "";
3158
3388
  }
3389
+ /**
3390
+ * Process a value that has exceeded the maximum depth limit by serializing it as text.
3391
+ * This prevents excessive chunking of deeply nested structures.
3392
+ * If the serialized value is too large, splits it using the text fallback splitter.
3393
+ */
3394
+ async processValueAsText(value, path2, level, indentLevel, chunks, isLastItem) {
3395
+ const indent = this.getIndent(indentLevel);
3396
+ const comma = isLastItem ? "" : ",";
3397
+ let serialized;
3398
+ if (this.preserveFormatting) {
3399
+ const lines = JSON.stringify(value, null, 2).split("\n");
3400
+ serialized = lines.map((line, idx) => idx === 0 ? line : `${indent}${line}`).join("\n");
3401
+ } else {
3402
+ serialized = JSON.stringify(value);
3403
+ }
3404
+ const fullContent = `${indent}${serialized}${comma}`;
3405
+ if (fullContent.length > SPLITTER_MAX_CHUNK_SIZE) {
3406
+ const textChunks = await this.textFallbackSplitter.splitText(serialized);
3407
+ for (const textChunk of textChunks) {
3408
+ chunks.push({
3409
+ types: ["code"],
3410
+ content: textChunk.content,
3411
+ section: { level, path: [...path2] }
3412
+ });
3413
+ }
3414
+ } else {
3415
+ chunks.push({
3416
+ types: ["code"],
3417
+ content: fullContent,
3418
+ section: { level, path: [...path2] }
3419
+ });
3420
+ }
3421
+ }
3159
3422
  }
3160
3423
  class CodeContentSplitter {
3161
3424
  constructor(options) {
@@ -3255,132 +3518,6 @@ class TableContentSplitter {
3255
3518
  return separator.includes("|") && /^\|?[\s-|]+\|?$/.test(separator);
3256
3519
  }
3257
3520
  }
3258
- class TextContentSplitter {
3259
- constructor(options) {
3260
- this.options = options;
3261
- }
3262
- /**
3263
- * Splits text content into chunks while trying to preserve semantic boundaries.
3264
- * Prefers paragraph breaks, then line breaks, finally falling back to word boundaries.
3265
- * Always preserves formatting - trimming should be done by higher-level splitters if needed.
3266
- */
3267
- async split(content) {
3268
- if (content.length <= this.options.chunkSize) {
3269
- return [content];
3270
- }
3271
- const words = content.split(/\s+/);
3272
- const longestWord = words.reduce(
3273
- (max, word) => word.length > max.length ? word : max
3274
- );
3275
- if (longestWord.length > this.options.chunkSize) {
3276
- throw new MinimumChunkSizeError(longestWord.length, this.options.chunkSize);
3277
- }
3278
- const paragraphChunks = this.splitByParagraphs(content);
3279
- if (this.areChunksValid(paragraphChunks)) {
3280
- return paragraphChunks;
3281
- }
3282
- const lineChunks = this.splitByLines(content);
3283
- if (this.areChunksValid(lineChunks)) {
3284
- return this.mergeChunks(lineChunks, "");
3285
- }
3286
- const wordChunks = await this.splitByWords(content);
3287
- return this.mergeChunks(wordChunks, " ");
3288
- }
3289
- /**
3290
- * Checks if all chunks are within the maximum size limit
3291
- */
3292
- areChunksValid(chunks) {
3293
- return chunks.every((chunk) => chunk.length <= this.options.chunkSize);
3294
- }
3295
- /**
3296
- * Splits text into chunks by paragraph boundaries (double newlines)
3297
- * Preserves all formatting and whitespace including the paragraph separators
3298
- */
3299
- splitByParagraphs(text) {
3300
- const chunks = [];
3301
- let startPos = 0;
3302
- const paragraphRegex = /\n\s*\n/g;
3303
- let match = paragraphRegex.exec(text);
3304
- while (match !== null) {
3305
- const endPos = match.index + match[0].length;
3306
- const chunk = text.slice(startPos, endPos);
3307
- if (chunk.length > 2) {
3308
- chunks.push(chunk);
3309
- }
3310
- startPos = endPos;
3311
- match = paragraphRegex.exec(text);
3312
- }
3313
- if (startPos < text.length) {
3314
- const remainingChunk = text.slice(startPos);
3315
- if (remainingChunk.length > 2) {
3316
- chunks.push(remainingChunk);
3317
- }
3318
- }
3319
- return chunks.filter(Boolean);
3320
- }
3321
- /**
3322
- * Splits text into chunks by line boundaries
3323
- * Preserves all formatting and whitespace, including newlines at the end of each line
3324
- */
3325
- splitByLines(text) {
3326
- const chunks = [];
3327
- let startPos = 0;
3328
- for (let i = 0; i < text.length; i++) {
3329
- if (text[i] === "\n") {
3330
- const chunk = text.slice(startPos, i + 1);
3331
- chunks.push(chunk);
3332
- startPos = i + 1;
3333
- }
3334
- }
3335
- if (startPos < text.length) {
3336
- chunks.push(text.slice(startPos));
3337
- }
3338
- return chunks;
3339
- }
3340
- /**
3341
- * Uses LangChain's recursive splitter for word-based splitting as a last resort
3342
- */
3343
- async splitByWords(text) {
3344
- const splitter = new RecursiveCharacterTextSplitter({
3345
- chunkSize: this.options.chunkSize,
3346
- chunkOverlap: 0
3347
- });
3348
- const chunks = await splitter.splitText(text);
3349
- return chunks;
3350
- }
3351
- /**
3352
- * Attempts to merge small chunks with previous chunks to minimize fragmentation.
3353
- * Only merges if combined size is within maxChunkSize.
3354
- */
3355
- mergeChunks(chunks, separator) {
3356
- const mergedChunks = [];
3357
- let currentChunk = null;
3358
- for (const chunk of chunks) {
3359
- if (currentChunk === null) {
3360
- currentChunk = chunk;
3361
- continue;
3362
- }
3363
- const currentChunkSize = this.getChunkSize(currentChunk);
3364
- const nextChunkSize = this.getChunkSize(chunk);
3365
- if (currentChunkSize + nextChunkSize + separator.length <= this.options.chunkSize) {
3366
- currentChunk = `${currentChunk}${separator}${chunk}`;
3367
- } else {
3368
- mergedChunks.push(currentChunk);
3369
- currentChunk = chunk;
3370
- }
3371
- }
3372
- if (currentChunk) {
3373
- mergedChunks.push(currentChunk);
3374
- }
3375
- return mergedChunks;
3376
- }
3377
- getChunkSize(chunk) {
3378
- return chunk.length;
3379
- }
3380
- wrap(content) {
3381
- return content;
3382
- }
3383
- }
3384
3521
  class SemanticMarkdownSplitter {
3385
3522
  constructor(preferredChunkSize, maxChunkSize) {
3386
3523
  this.preferredChunkSize = preferredChunkSize;
@@ -6452,45 +6589,6 @@ class MarkdownPipeline extends BasePipeline {
6452
6589
  };
6453
6590
  }
6454
6591
  }
6455
- class TextDocumentSplitter {
6456
- options;
6457
- textSplitter;
6458
- constructor(options = {}) {
6459
- this.options = {
6460
- maxChunkSize: options.maxChunkSize ?? SPLITTER_MAX_CHUNK_SIZE
6461
- };
6462
- this.textSplitter = new TextContentSplitter({
6463
- chunkSize: this.options.maxChunkSize
6464
- });
6465
- }
6466
- async splitText(content) {
6467
- if (!content.trim()) {
6468
- return [];
6469
- }
6470
- try {
6471
- const chunks = await this.textSplitter.split(content);
6472
- return chunks.map((chunk) => ({
6473
- types: ["text"],
6474
- content: chunk,
6475
- section: {
6476
- level: 0,
6477
- path: []
6478
- }
6479
- }));
6480
- } catch {
6481
- return [
6482
- {
6483
- types: ["text"],
6484
- content,
6485
- section: {
6486
- level: 0,
6487
- path: []
6488
- }
6489
- }
6490
- ];
6491
- }
6492
- }
6493
- }
6494
6592
  class TextPipeline extends BasePipeline {
6495
6593
  middleware;
6496
6594
  splitter;
@@ -8973,9 +9071,10 @@ class DocumentStore {
8973
9071
  * - Single texts that are too large are truncated and retried once
8974
9072
  *
8975
9073
  * @param texts Array of texts to embed
9074
+ * @param isRetry Internal flag to prevent duplicate warning logs
8976
9075
  * @returns Array of embedding vectors
8977
9076
  */
8978
- async embedDocumentsWithRetry(texts) {
9077
+ async embedDocumentsWithRetry(texts, isRetry = false) {
8979
9078
  if (texts.length === 0) {
8980
9079
  return [];
8981
9080
  }
@@ -8987,26 +9086,27 @@ class DocumentStore {
8987
9086
  const midpoint = Math.floor(texts.length / 2);
8988
9087
  const firstHalf = texts.slice(0, midpoint);
8989
9088
  const secondHalf = texts.slice(midpoint);
8990
- logger.warn(
8991
- `⚠️ Batch of ${texts.length} texts exceeded size limit, splitting into ${firstHalf.length} + ${secondHalf.length}`
8992
- );
9089
+ if (!isRetry) {
9090
+ logger.warn(
9091
+ `⚠️ Batch of ${texts.length} texts exceeded size limit, splitting into ${firstHalf.length} + ${secondHalf.length}`
9092
+ );
9093
+ }
8993
9094
  const [firstEmbeddings, secondEmbeddings] = await Promise.all([
8994
- this.embedDocumentsWithRetry(firstHalf),
8995
- this.embedDocumentsWithRetry(secondHalf)
9095
+ this.embedDocumentsWithRetry(firstHalf, true),
9096
+ this.embedDocumentsWithRetry(secondHalf, true)
8996
9097
  ]);
8997
9098
  return [...firstEmbeddings, ...secondEmbeddings];
8998
9099
  } else {
8999
9100
  const text = texts[0];
9000
9101
  const midpoint = Math.floor(text.length / 2);
9001
9102
  const firstHalf = text.substring(0, midpoint);
9002
- logger.warn(
9003
- `⚠️ Single text exceeded embedding size limit (${text.length} chars). Truncating at ${firstHalf.length} chars.`
9004
- );
9005
- try {
9006
- const embedding = await this.embedDocumentsWithRetry([firstHalf]);
9007
- logger.info(
9008
- `✓ Using embedding from first half of split text (${firstHalf.length} chars)`
9103
+ if (!isRetry) {
9104
+ logger.warn(
9105
+ `⚠️ Single text exceeded embedding size limit (${text.length} chars).`
9009
9106
  );
9107
+ }
9108
+ try {
9109
+ const embedding = await this.embedDocumentsWithRetry([firstHalf], true);
9010
9110
  return embedding;
9011
9111
  } catch (retryError) {
9012
9112
  logger.error(
@@ -9130,8 +9230,8 @@ class DocumentStore {
9130
9230
  const rowId = result2.lastInsertRowid;
9131
9231
  if (this.isVectorSearchEnabled && paddedEmbeddings.length > 0) {
9132
9232
  this.statements.insertEmbedding.run(
9133
- BigInt(rowId),
9134
- JSON.stringify(paddedEmbeddings[docIndex])
9233
+ JSON.stringify(paddedEmbeddings[docIndex]),
9234
+ BigInt(rowId)
9135
9235
  );
9136
9236
  }
9137
9237
  docIndex++;
@@ -10056,6 +10156,8 @@ async function registerMcpService(server, docService, pipeline, readOnly = false
10056
10156
  const mcpServer = createMcpServerInstance(mcpTools, readOnly);
10057
10157
  const authMiddleware = authManager ? createAuthMiddleware(authManager) : null;
10058
10158
  const sseTransports = {};
10159
+ const heartbeatIntervals = {};
10160
+ const HEARTBEAT_INTERVAL_MS = 3e4;
10059
10161
  server.route({
10060
10162
  method: "GET",
10061
10163
  url: "/sse",
@@ -10067,12 +10169,31 @@ async function registerMcpService(server, docService, pipeline, readOnly = false
10067
10169
  if (telemetry.isEnabled()) {
10068
10170
  logger.info(`🔗 MCP client connected: ${transport.sessionId}`);
10069
10171
  }
10070
- reply.raw.on("close", () => {
10172
+ const heartbeatInterval = setInterval(() => {
10173
+ try {
10174
+ reply.raw.write(": heartbeat\n\n");
10175
+ } catch {
10176
+ clearInterval(heartbeatInterval);
10177
+ delete heartbeatIntervals[transport.sessionId];
10178
+ }
10179
+ }, HEARTBEAT_INTERVAL_MS);
10180
+ heartbeatIntervals[transport.sessionId] = heartbeatInterval;
10181
+ const cleanupConnection = () => {
10182
+ const interval = heartbeatIntervals[transport.sessionId];
10183
+ if (interval) {
10184
+ clearInterval(interval);
10185
+ delete heartbeatIntervals[transport.sessionId];
10186
+ }
10071
10187
  delete sseTransports[transport.sessionId];
10072
10188
  transport.close();
10073
10189
  if (telemetry.isEnabled()) {
10074
10190
  logger.info(`🔗 MCP client disconnected: ${transport.sessionId}`);
10075
10191
  }
10192
+ };
10193
+ reply.raw.on("close", cleanupConnection);
10194
+ reply.raw.on("error", (error) => {
10195
+ logger.debug(`SSE connection error: ${error}`);
10196
+ cleanupConnection();
10076
10197
  });
10077
10198
  await mcpServer.connect(transport);
10078
10199
  } catch (error) {
@@ -10114,10 +10235,15 @@ async function registerMcpService(server, docService, pipeline, readOnly = false
10114
10235
  const requestTransport = new StreamableHTTPServerTransport({
10115
10236
  sessionIdGenerator: void 0
10116
10237
  });
10117
- reply.raw.on("close", () => {
10238
+ const cleanupRequest = () => {
10118
10239
  logger.debug("Streamable HTTP request closed");
10119
10240
  requestTransport.close();
10120
10241
  requestServer.close();
10242
+ };
10243
+ reply.raw.on("close", cleanupRequest);
10244
+ reply.raw.on("error", (error) => {
10245
+ logger.debug(`Streamable HTTP connection error: ${error}`);
10246
+ cleanupRequest();
10121
10247
  });
10122
10248
  await requestServer.connect(requestTransport);
10123
10249
  await requestTransport.handleRequest(request.raw, reply.raw, request.body);
@@ -10130,10 +10256,17 @@ async function registerMcpService(server, docService, pipeline, readOnly = false
10130
10256
  }
10131
10257
  });
10132
10258
  mcpServer._sseTransports = sseTransports;
10259
+ mcpServer._heartbeatIntervals = heartbeatIntervals;
10133
10260
  return mcpServer;
10134
10261
  }
10135
10262
  async function cleanupMcpService(mcpServer) {
10136
10263
  try {
10264
+ const heartbeatIntervals = mcpServer._heartbeatIntervals;
10265
+ if (heartbeatIntervals) {
10266
+ for (const interval of Object.values(heartbeatIntervals)) {
10267
+ clearInterval(interval);
10268
+ }
10269
+ }
10137
10270
  const sseTransports = mcpServer._sseTransports;
10138
10271
  if (sseTransports) {
10139
10272
  for (const transport of Object.values(sseTransports)) {
@@ -10737,7 +10870,7 @@ const Layout = ({
10737
10870
  children,
10738
10871
  eventClientConfig
10739
10872
  }) => {
10740
- const versionString = version || "1.31.1";
10873
+ const versionString = version || "1.33.0";
10741
10874
  const versionInitializer = `versionUpdate({ currentVersion: ${`'${versionString}'`} })`;
10742
10875
  return /* @__PURE__ */ jsxs("html", { lang: "en", children: [
10743
10876
  /* @__PURE__ */ jsxs("head", { children: [
@@ -13049,7 +13182,7 @@ class AppServer {
13049
13182
  try {
13050
13183
  if (telemetry.isEnabled()) {
13051
13184
  telemetry.setGlobalContext({
13052
- appVersion: "1.31.1",
13185
+ appVersion: "1.33.0",
13053
13186
  appPlatform: process.platform,
13054
13187
  appNodeVersion: process.version,
13055
13188
  appServicesEnabled: this.getActiveServicesList(),
@@ -16536,7 +16669,7 @@ function createCliProgram() {
16536
16669
  const commandStartTimes = /* @__PURE__ */ new Map();
16537
16670
  let globalEventBus = null;
16538
16671
  let globalTelemetryService = null;
16539
- program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version("1.31.1").addOption(
16672
+ program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version("1.33.0").addOption(
16540
16673
  new Option("--verbose", "Enable verbose (debug) logging").conflicts("silent")
16541
16674
  ).addOption(new Option("--silent", "Disable all logging except errors")).addOption(
16542
16675
  new Option("--telemetry", "Enable telemetry collection").env("DOCS_MCP_TELEMETRY").argParser((value) => {
@@ -16570,7 +16703,7 @@ function createCliProgram() {
16570
16703
  if (shouldEnableTelemetry()) {
16571
16704
  if (telemetry.isEnabled()) {
16572
16705
  telemetry.setGlobalContext({
16573
- appVersion: "1.31.1",
16706
+ appVersion: "1.33.0",
16574
16707
  appPlatform: process.platform,
16575
16708
  appNodeVersion: process.version,
16576
16709
  appInterface: "cli",