@arabold/docs-mcp-server 1.32.0 → 1.33.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1472,6 +1472,8 @@ const FETCHER_MAX_CACHE_ITEM_SIZE_BYTES = 500 * 1024;
1472
1472
  const SPLITTER_MIN_CHUNK_SIZE = 500;
1473
1473
  const SPLITTER_PREFERRED_CHUNK_SIZE = 1500;
1474
1474
  const SPLITTER_MAX_CHUNK_SIZE = 5e3;
1475
+ const JSON_MAX_NESTING_DEPTH = 5;
1476
+ const JSON_MAX_CHUNKS = 1e3;
1475
1477
  const EMBEDDING_BATCH_SIZE = 100;
1476
1478
  const EMBEDDING_BATCH_CHARS = 5e4;
1477
1479
  const MIGRATION_MAX_RETRIES = 5;
@@ -2355,19 +2357,17 @@ class BrowserFetcher {
2355
2357
  );
2356
2358
  }
2357
2359
  }
2360
+ static async launchBrowser() {
2361
+ return chromium.launch({
2362
+ headless: true,
2363
+ executablePath: process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH || void 0,
2364
+ args: ["--no-sandbox"]
2365
+ });
2366
+ }
2358
2367
  async ensureBrowserReady() {
2359
2368
  if (!this.browser) {
2360
2369
  logger.debug("Launching browser...");
2361
- this.browser = await chromium.launch({
2362
- headless: true,
2363
- args: [
2364
- "--no-sandbox",
2365
- "--disable-setuid-sandbox",
2366
- "--disable-dev-shm-usage",
2367
- "--disable-web-security",
2368
- "--disable-features=site-per-process"
2369
- ]
2370
- });
2370
+ this.browser = await BrowserFetcher.launchBrowser();
2371
2371
  }
2372
2372
  if (!this.page) {
2373
2373
  this.page = await this.browser.newPage();
@@ -3044,16 +3044,203 @@ class GreedySplitter {
3044
3044
  return common;
3045
3045
  }
3046
3046
  }
3047
+ class TextContentSplitter {
3048
+ constructor(options) {
3049
+ this.options = options;
3050
+ }
3051
+ /**
3052
+ * Splits text content into chunks while trying to preserve semantic boundaries.
3053
+ * Prefers paragraph breaks, then line breaks, finally falling back to word boundaries.
3054
+ * Always preserves formatting - trimming should be done by higher-level splitters if needed.
3055
+ */
3056
+ async split(content) {
3057
+ if (content.length <= this.options.chunkSize) {
3058
+ return [content];
3059
+ }
3060
+ const words = content.split(/\s+/);
3061
+ const longestWord = words.reduce(
3062
+ (max, word) => word.length > max.length ? word : max
3063
+ );
3064
+ if (longestWord.length > this.options.chunkSize) {
3065
+ throw new MinimumChunkSizeError(longestWord.length, this.options.chunkSize);
3066
+ }
3067
+ const paragraphChunks = this.splitByParagraphs(content);
3068
+ if (this.areChunksValid(paragraphChunks)) {
3069
+ return paragraphChunks;
3070
+ }
3071
+ const lineChunks = this.splitByLines(content);
3072
+ if (this.areChunksValid(lineChunks)) {
3073
+ return this.mergeChunks(lineChunks, "");
3074
+ }
3075
+ const wordChunks = await this.splitByWords(content);
3076
+ return this.mergeChunks(wordChunks, " ");
3077
+ }
3078
+ /**
3079
+ * Checks if all chunks are within the maximum size limit
3080
+ */
3081
+ areChunksValid(chunks) {
3082
+ return chunks.every((chunk) => chunk.length <= this.options.chunkSize);
3083
+ }
3084
+ /**
3085
+ * Splits text into chunks by paragraph boundaries (double newlines)
3086
+ * Preserves all formatting and whitespace including the paragraph separators
3087
+ */
3088
+ splitByParagraphs(text) {
3089
+ const chunks = [];
3090
+ let startPos = 0;
3091
+ const paragraphRegex = /\n\s*\n/g;
3092
+ let match = paragraphRegex.exec(text);
3093
+ while (match !== null) {
3094
+ const endPos = match.index + match[0].length;
3095
+ const chunk = text.slice(startPos, endPos);
3096
+ if (chunk.length > 2) {
3097
+ chunks.push(chunk);
3098
+ }
3099
+ startPos = endPos;
3100
+ match = paragraphRegex.exec(text);
3101
+ }
3102
+ if (startPos < text.length) {
3103
+ const remainingChunk = text.slice(startPos);
3104
+ if (remainingChunk.length > 2) {
3105
+ chunks.push(remainingChunk);
3106
+ }
3107
+ }
3108
+ return chunks.filter(Boolean);
3109
+ }
3110
+ /**
3111
+ * Splits text into chunks by line boundaries
3112
+ * Preserves all formatting and whitespace, including newlines at the end of each line
3113
+ */
3114
+ splitByLines(text) {
3115
+ const chunks = [];
3116
+ let startPos = 0;
3117
+ for (let i = 0; i < text.length; i++) {
3118
+ if (text[i] === "\n") {
3119
+ const chunk = text.slice(startPos, i + 1);
3120
+ chunks.push(chunk);
3121
+ startPos = i + 1;
3122
+ }
3123
+ }
3124
+ if (startPos < text.length) {
3125
+ chunks.push(text.slice(startPos));
3126
+ }
3127
+ return chunks;
3128
+ }
3129
+ /**
3130
+ * Uses LangChain's recursive splitter for word-based splitting as a last resort
3131
+ */
3132
+ async splitByWords(text) {
3133
+ const splitter = new RecursiveCharacterTextSplitter({
3134
+ chunkSize: this.options.chunkSize,
3135
+ chunkOverlap: 0
3136
+ });
3137
+ const chunks = await splitter.splitText(text);
3138
+ return chunks;
3139
+ }
3140
+ /**
3141
+ * Attempts to merge small chunks with previous chunks to minimize fragmentation.
3142
+ * Only merges if combined size is within maxChunkSize.
3143
+ */
3144
+ mergeChunks(chunks, separator) {
3145
+ const mergedChunks = [];
3146
+ let currentChunk = null;
3147
+ for (const chunk of chunks) {
3148
+ if (currentChunk === null) {
3149
+ currentChunk = chunk;
3150
+ continue;
3151
+ }
3152
+ const currentChunkSize = this.getChunkSize(currentChunk);
3153
+ const nextChunkSize = this.getChunkSize(chunk);
3154
+ if (currentChunkSize + nextChunkSize + separator.length <= this.options.chunkSize) {
3155
+ currentChunk = `${currentChunk}${separator}${chunk}`;
3156
+ } else {
3157
+ mergedChunks.push(currentChunk);
3158
+ currentChunk = chunk;
3159
+ }
3160
+ }
3161
+ if (currentChunk) {
3162
+ mergedChunks.push(currentChunk);
3163
+ }
3164
+ return mergedChunks;
3165
+ }
3166
+ getChunkSize(chunk) {
3167
+ return chunk.length;
3168
+ }
3169
+ wrap(content) {
3170
+ return content;
3171
+ }
3172
+ }
3173
+ class TextDocumentSplitter {
3174
+ options;
3175
+ textSplitter;
3176
+ constructor(options = {}) {
3177
+ this.options = {
3178
+ maxChunkSize: options.maxChunkSize ?? SPLITTER_MAX_CHUNK_SIZE
3179
+ };
3180
+ this.textSplitter = new TextContentSplitter({
3181
+ chunkSize: this.options.maxChunkSize
3182
+ });
3183
+ }
3184
+ async splitText(content) {
3185
+ if (!content.trim()) {
3186
+ return [];
3187
+ }
3188
+ try {
3189
+ const chunks = await this.textSplitter.split(content);
3190
+ return chunks.map((chunk) => ({
3191
+ types: ["text"],
3192
+ content: chunk,
3193
+ section: {
3194
+ level: 0,
3195
+ path: []
3196
+ }
3197
+ }));
3198
+ } catch (error) {
3199
+ if (!(error instanceof MinimumChunkSizeError) && error instanceof Error) {
3200
+ console.warn(
3201
+ `Unexpected text splitting error: ${error.message}. Forcing character-based split.`
3202
+ );
3203
+ }
3204
+ const chunks = [];
3205
+ let offset = 0;
3206
+ while (offset < content.length) {
3207
+ const chunkContent = content.substring(
3208
+ offset,
3209
+ offset + this.options.maxChunkSize
3210
+ );
3211
+ chunks.push({
3212
+ types: ["text"],
3213
+ content: chunkContent,
3214
+ section: {
3215
+ level: 0,
3216
+ path: []
3217
+ }
3218
+ });
3219
+ offset += this.options.maxChunkSize;
3220
+ }
3221
+ return chunks;
3222
+ }
3223
+ }
3224
+ }
3047
3225
  class JsonDocumentSplitter {
3048
3226
  preserveFormatting;
3227
+ maxDepth;
3228
+ maxChunks;
3229
+ textFallbackSplitter;
3049
3230
  constructor(options = {}) {
3050
3231
  this.preserveFormatting = options.preserveFormatting ?? true;
3232
+ this.maxDepth = options.maxDepth ?? JSON_MAX_NESTING_DEPTH;
3233
+ this.maxChunks = options.maxChunks ?? JSON_MAX_CHUNKS;
3234
+ this.textFallbackSplitter = new TextDocumentSplitter();
3051
3235
  }
3052
3236
  async splitText(content, _contentType) {
3053
3237
  try {
3054
3238
  const parsed = JSON.parse(content);
3055
3239
  const chunks = [];
3056
- this.processValue(parsed, ["root"], 1, 0, chunks, true);
3240
+ await this.processValue(parsed, ["root"], 1, 0, chunks, true);
3241
+ if (chunks.length > this.maxChunks) {
3242
+ return this.textFallbackSplitter.splitText(content);
3243
+ }
3057
3244
  return chunks;
3058
3245
  } catch {
3059
3246
  return [
@@ -3068,16 +3255,20 @@ class JsonDocumentSplitter {
3068
3255
  ];
3069
3256
  }
3070
3257
  }
3071
- processValue(value, path2, level, indentLevel, chunks, isLastItem) {
3258
+ async processValue(value, path2, level, indentLevel, chunks, isLastItem) {
3259
+ if (level > this.maxDepth) {
3260
+ await this.processValueAsText(value, path2, level, indentLevel, chunks, isLastItem);
3261
+ return;
3262
+ }
3072
3263
  if (Array.isArray(value)) {
3073
- this.processArray(value, path2, level, indentLevel, chunks, isLastItem);
3264
+ await this.processArray(value, path2, level, indentLevel, chunks, isLastItem);
3074
3265
  } else if (value !== null && typeof value === "object") {
3075
- this.processObject(value, path2, level, indentLevel, chunks, isLastItem);
3266
+ await this.processObject(value, path2, level, indentLevel, chunks, isLastItem);
3076
3267
  } else {
3077
- this.processPrimitive(value, path2, level, indentLevel, chunks, isLastItem);
3268
+ await this.processPrimitive(value, path2, level, indentLevel, chunks, isLastItem);
3078
3269
  }
3079
3270
  }
3080
- processArray(array, path2, level, indentLevel, chunks, isLastItem) {
3271
+ async processArray(array, path2, level, indentLevel, chunks, isLastItem) {
3081
3272
  const indent = this.getIndent(indentLevel);
3082
3273
  const comma = isLastItem ? "" : ",";
3083
3274
  chunks.push({
@@ -3085,18 +3276,19 @@ class JsonDocumentSplitter {
3085
3276
  content: `${indent}[`,
3086
3277
  section: { level, path: [...path2] }
3087
3278
  });
3088
- array.forEach((item, index) => {
3279
+ for (let index = 0; index < array.length; index++) {
3280
+ const item = array[index];
3089
3281
  const isLast = index === array.length - 1;
3090
3282
  const itemPath = [...path2, `[${index}]`];
3091
- this.processValue(item, itemPath, level + 1, indentLevel + 1, chunks, isLast);
3092
- });
3283
+ await this.processValue(item, itemPath, level + 1, indentLevel + 1, chunks, isLast);
3284
+ }
3093
3285
  chunks.push({
3094
3286
  types: ["code"],
3095
3287
  content: `${indent}]${comma}`,
3096
3288
  section: { level, path: [...path2] }
3097
3289
  });
3098
3290
  }
3099
- processObject(obj, path2, level, indentLevel, chunks, isLastItem) {
3291
+ async processObject(obj, path2, level, indentLevel, chunks, isLastItem) {
3100
3292
  const indent = this.getIndent(indentLevel);
3101
3293
  const comma = isLastItem ? "" : ",";
3102
3294
  const entries = Object.entries(obj);
@@ -3105,10 +3297,11 @@ class JsonDocumentSplitter {
3105
3297
  content: `${indent}{`,
3106
3298
  section: { level, path: [...path2] }
3107
3299
  });
3108
- entries.forEach(([key, value], index) => {
3300
+ for (let index = 0; index < entries.length; index++) {
3301
+ const [key, value] = entries[index];
3109
3302
  const isLast = index === entries.length - 1;
3110
3303
  const propertyPath = [...path2, key];
3111
- this.processProperty(
3304
+ await this.processProperty(
3112
3305
  key,
3113
3306
  value,
3114
3307
  propertyPath,
@@ -3117,14 +3310,14 @@ class JsonDocumentSplitter {
3117
3310
  chunks,
3118
3311
  isLast
3119
3312
  );
3120
- });
3313
+ }
3121
3314
  chunks.push({
3122
3315
  types: ["code"],
3123
3316
  content: `${indent}}${comma}`,
3124
3317
  section: { level, path: [...path2] }
3125
3318
  });
3126
3319
  }
3127
- processProperty(key, value, path2, level, indentLevel, chunks, isLastProperty) {
3320
+ async processProperty(key, value, path2, level, indentLevel, chunks, isLastProperty) {
3128
3321
  const indent = this.getIndent(indentLevel);
3129
3322
  if (typeof value === "object" && value !== null) {
3130
3323
  chunks.push({
@@ -3132,30 +3325,98 @@ class JsonDocumentSplitter {
3132
3325
  content: `${indent}"${key}": `,
3133
3326
  section: { level, path: path2 }
3134
3327
  });
3135
- this.processValue(value, path2, level, indentLevel, chunks, isLastProperty);
3328
+ await this.processValue(value, path2, level, indentLevel, chunks, isLastProperty);
3136
3329
  } else {
3137
3330
  const comma = isLastProperty ? "" : ",";
3138
3331
  const formattedValue = JSON.stringify(value);
3139
- chunks.push({
3140
- types: ["code"],
3141
- content: `${indent}"${key}": ${formattedValue}${comma}`,
3142
- section: { level, path: path2 }
3143
- });
3332
+ const fullContent = `${indent}"${key}": ${formattedValue}${comma}`;
3333
+ if (fullContent.length > SPLITTER_MAX_CHUNK_SIZE) {
3334
+ const textChunks = await this.textFallbackSplitter.splitText(formattedValue);
3335
+ chunks.push({
3336
+ types: ["code"],
3337
+ content: `${indent}"${key}": `,
3338
+ section: { level, path: path2 }
3339
+ });
3340
+ textChunks.forEach((textChunk, index) => {
3341
+ const isLastChunk = index === textChunks.length - 1;
3342
+ const content = `${textChunk.content}${isLastChunk ? comma : ""}`;
3343
+ chunks.push({
3344
+ types: ["code"],
3345
+ content,
3346
+ section: { level, path: path2 }
3347
+ });
3348
+ });
3349
+ } else {
3350
+ chunks.push({
3351
+ types: ["code"],
3352
+ content: fullContent,
3353
+ section: { level, path: path2 }
3354
+ });
3355
+ }
3144
3356
  }
3145
3357
  }
3146
- processPrimitive(value, path2, level, indentLevel, chunks, isLastItem) {
3358
+ async processPrimitive(value, path2, level, indentLevel, chunks, isLastItem) {
3147
3359
  const indent = this.getIndent(indentLevel);
3148
3360
  const comma = isLastItem ? "" : ",";
3149
3361
  const formattedValue = JSON.stringify(value);
3150
- chunks.push({
3151
- types: ["code"],
3152
- content: `${indent}${formattedValue}${comma}`,
3153
- section: { level, path: path2 }
3154
- });
3362
+ const fullContent = `${indent}${formattedValue}${comma}`;
3363
+ if (fullContent.length > SPLITTER_MAX_CHUNK_SIZE) {
3364
+ const textChunks = await this.textFallbackSplitter.splitText(formattedValue);
3365
+ textChunks.forEach((textChunk, index) => {
3366
+ const isFirstChunk = index === 0;
3367
+ const isLastChunk = index === textChunks.length - 1;
3368
+ const valueContent = isFirstChunk ? `${indent}${textChunk.content}` : textChunk.content;
3369
+ const content = `${valueContent}${isLastChunk ? comma : ""}`;
3370
+ chunks.push({
3371
+ types: ["code"],
3372
+ content,
3373
+ section: { level, path: [...path2] }
3374
+ });
3375
+ });
3376
+ } else {
3377
+ chunks.push({
3378
+ types: ["code"],
3379
+ content: fullContent,
3380
+ section: { level, path: path2 }
3381
+ });
3382
+ }
3155
3383
  }
3156
3384
  getIndent(level) {
3157
3385
  return this.preserveFormatting ? " ".repeat(level) : "";
3158
3386
  }
3387
+ /**
3388
+ * Process a value that has exceeded the maximum depth limit by serializing it as text.
3389
+ * This prevents excessive chunking of deeply nested structures.
3390
+ * If the serialized value is too large, splits it using the text fallback splitter.
3391
+ */
3392
+ async processValueAsText(value, path2, level, indentLevel, chunks, isLastItem) {
3393
+ const indent = this.getIndent(indentLevel);
3394
+ const comma = isLastItem ? "" : ",";
3395
+ let serialized;
3396
+ if (this.preserveFormatting) {
3397
+ const lines = JSON.stringify(value, null, 2).split("\n");
3398
+ serialized = lines.map((line, idx) => idx === 0 ? line : `${indent}${line}`).join("\n");
3399
+ } else {
3400
+ serialized = JSON.stringify(value);
3401
+ }
3402
+ const fullContent = `${indent}${serialized}${comma}`;
3403
+ if (fullContent.length > SPLITTER_MAX_CHUNK_SIZE) {
3404
+ const textChunks = await this.textFallbackSplitter.splitText(serialized);
3405
+ for (const textChunk of textChunks) {
3406
+ chunks.push({
3407
+ types: ["code"],
3408
+ content: textChunk.content,
3409
+ section: { level, path: [...path2] }
3410
+ });
3411
+ }
3412
+ } else {
3413
+ chunks.push({
3414
+ types: ["code"],
3415
+ content: fullContent,
3416
+ section: { level, path: [...path2] }
3417
+ });
3418
+ }
3419
+ }
3159
3420
  }
3160
3421
  class CodeContentSplitter {
3161
3422
  constructor(options) {
@@ -3255,132 +3516,6 @@ class TableContentSplitter {
3255
3516
  return separator.includes("|") && /^\|?[\s-|]+\|?$/.test(separator);
3256
3517
  }
3257
3518
  }
3258
- class TextContentSplitter {
3259
- constructor(options) {
3260
- this.options = options;
3261
- }
3262
- /**
3263
- * Splits text content into chunks while trying to preserve semantic boundaries.
3264
- * Prefers paragraph breaks, then line breaks, finally falling back to word boundaries.
3265
- * Always preserves formatting - trimming should be done by higher-level splitters if needed.
3266
- */
3267
- async split(content) {
3268
- if (content.length <= this.options.chunkSize) {
3269
- return [content];
3270
- }
3271
- const words = content.split(/\s+/);
3272
- const longestWord = words.reduce(
3273
- (max, word) => word.length > max.length ? word : max
3274
- );
3275
- if (longestWord.length > this.options.chunkSize) {
3276
- throw new MinimumChunkSizeError(longestWord.length, this.options.chunkSize);
3277
- }
3278
- const paragraphChunks = this.splitByParagraphs(content);
3279
- if (this.areChunksValid(paragraphChunks)) {
3280
- return paragraphChunks;
3281
- }
3282
- const lineChunks = this.splitByLines(content);
3283
- if (this.areChunksValid(lineChunks)) {
3284
- return this.mergeChunks(lineChunks, "");
3285
- }
3286
- const wordChunks = await this.splitByWords(content);
3287
- return this.mergeChunks(wordChunks, " ");
3288
- }
3289
- /**
3290
- * Checks if all chunks are within the maximum size limit
3291
- */
3292
- areChunksValid(chunks) {
3293
- return chunks.every((chunk) => chunk.length <= this.options.chunkSize);
3294
- }
3295
- /**
3296
- * Splits text into chunks by paragraph boundaries (double newlines)
3297
- * Preserves all formatting and whitespace including the paragraph separators
3298
- */
3299
- splitByParagraphs(text) {
3300
- const chunks = [];
3301
- let startPos = 0;
3302
- const paragraphRegex = /\n\s*\n/g;
3303
- let match = paragraphRegex.exec(text);
3304
- while (match !== null) {
3305
- const endPos = match.index + match[0].length;
3306
- const chunk = text.slice(startPos, endPos);
3307
- if (chunk.length > 2) {
3308
- chunks.push(chunk);
3309
- }
3310
- startPos = endPos;
3311
- match = paragraphRegex.exec(text);
3312
- }
3313
- if (startPos < text.length) {
3314
- const remainingChunk = text.slice(startPos);
3315
- if (remainingChunk.length > 2) {
3316
- chunks.push(remainingChunk);
3317
- }
3318
- }
3319
- return chunks.filter(Boolean);
3320
- }
3321
- /**
3322
- * Splits text into chunks by line boundaries
3323
- * Preserves all formatting and whitespace, including newlines at the end of each line
3324
- */
3325
- splitByLines(text) {
3326
- const chunks = [];
3327
- let startPos = 0;
3328
- for (let i = 0; i < text.length; i++) {
3329
- if (text[i] === "\n") {
3330
- const chunk = text.slice(startPos, i + 1);
3331
- chunks.push(chunk);
3332
- startPos = i + 1;
3333
- }
3334
- }
3335
- if (startPos < text.length) {
3336
- chunks.push(text.slice(startPos));
3337
- }
3338
- return chunks;
3339
- }
3340
- /**
3341
- * Uses LangChain's recursive splitter for word-based splitting as a last resort
3342
- */
3343
- async splitByWords(text) {
3344
- const splitter = new RecursiveCharacterTextSplitter({
3345
- chunkSize: this.options.chunkSize,
3346
- chunkOverlap: 0
3347
- });
3348
- const chunks = await splitter.splitText(text);
3349
- return chunks;
3350
- }
3351
- /**
3352
- * Attempts to merge small chunks with previous chunks to minimize fragmentation.
3353
- * Only merges if combined size is within maxChunkSize.
3354
- */
3355
- mergeChunks(chunks, separator) {
3356
- const mergedChunks = [];
3357
- let currentChunk = null;
3358
- for (const chunk of chunks) {
3359
- if (currentChunk === null) {
3360
- currentChunk = chunk;
3361
- continue;
3362
- }
3363
- const currentChunkSize = this.getChunkSize(currentChunk);
3364
- const nextChunkSize = this.getChunkSize(chunk);
3365
- if (currentChunkSize + nextChunkSize + separator.length <= this.options.chunkSize) {
3366
- currentChunk = `${currentChunk}${separator}${chunk}`;
3367
- } else {
3368
- mergedChunks.push(currentChunk);
3369
- currentChunk = chunk;
3370
- }
3371
- }
3372
- if (currentChunk) {
3373
- mergedChunks.push(currentChunk);
3374
- }
3375
- return mergedChunks;
3376
- }
3377
- getChunkSize(chunk) {
3378
- return chunk.length;
3379
- }
3380
- wrap(content) {
3381
- return content;
3382
- }
3383
- }
3384
3519
  class SemanticMarkdownSplitter {
3385
3520
  constructor(preferredChunkSize, maxChunkSize) {
3386
3521
  this.preferredChunkSize = preferredChunkSize;
@@ -5066,16 +5201,8 @@ class HtmlPlaywrightMiddleware {
5066
5201
  */
5067
5202
  async ensureBrowser() {
5068
5203
  if (!this.browser || !this.browser.isConnected()) {
5069
- const launchArgs = process.env.PLAYWRIGHT_LAUNCH_ARGS?.split(" ") ?? [];
5070
- const executablePath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH || void 0;
5071
- logger.debug(
5072
- `Launching new Playwright browser instance (Chromium) with args: ${launchArgs.join(" ") || "none"}...`
5073
- );
5074
- this.browser = await chromium.launch({
5075
- channel: "chromium",
5076
- args: launchArgs,
5077
- executablePath
5078
- });
5204
+ logger.debug("Launching new Playwright browser instance (Chromium)");
5205
+ this.browser = await BrowserFetcher.launchBrowser();
5079
5206
  this.browser.on("disconnected", () => {
5080
5207
  logger.debug("Playwright browser instance disconnected.");
5081
5208
  this.browser = null;
@@ -6452,45 +6579,6 @@ class MarkdownPipeline extends BasePipeline {
6452
6579
  };
6453
6580
  }
6454
6581
  }
6455
- class TextDocumentSplitter {
6456
- options;
6457
- textSplitter;
6458
- constructor(options = {}) {
6459
- this.options = {
6460
- maxChunkSize: options.maxChunkSize ?? SPLITTER_MAX_CHUNK_SIZE
6461
- };
6462
- this.textSplitter = new TextContentSplitter({
6463
- chunkSize: this.options.maxChunkSize
6464
- });
6465
- }
6466
- async splitText(content) {
6467
- if (!content.trim()) {
6468
- return [];
6469
- }
6470
- try {
6471
- const chunks = await this.textSplitter.split(content);
6472
- return chunks.map((chunk) => ({
6473
- types: ["text"],
6474
- content: chunk,
6475
- section: {
6476
- level: 0,
6477
- path: []
6478
- }
6479
- }));
6480
- } catch {
6481
- return [
6482
- {
6483
- types: ["text"],
6484
- content,
6485
- section: {
6486
- level: 0,
6487
- path: []
6488
- }
6489
- }
6490
- ];
6491
- }
6492
- }
6493
- }
6494
6582
  class TextPipeline extends BasePipeline {
6495
6583
  middleware;
6496
6584
  splitter;
@@ -8973,9 +9061,10 @@ class DocumentStore {
8973
9061
  * - Single texts that are too large are truncated and retried once
8974
9062
  *
8975
9063
  * @param texts Array of texts to embed
9064
+ * @param isRetry Internal flag to prevent duplicate warning logs
8976
9065
  * @returns Array of embedding vectors
8977
9066
  */
8978
- async embedDocumentsWithRetry(texts) {
9067
+ async embedDocumentsWithRetry(texts, isRetry = false) {
8979
9068
  if (texts.length === 0) {
8980
9069
  return [];
8981
9070
  }
@@ -8987,26 +9076,27 @@ class DocumentStore {
8987
9076
  const midpoint = Math.floor(texts.length / 2);
8988
9077
  const firstHalf = texts.slice(0, midpoint);
8989
9078
  const secondHalf = texts.slice(midpoint);
8990
- logger.warn(
8991
- `⚠️ Batch of ${texts.length} texts exceeded size limit, splitting into ${firstHalf.length} + ${secondHalf.length}`
8992
- );
9079
+ if (!isRetry) {
9080
+ logger.warn(
9081
+ `⚠️ Batch of ${texts.length} texts exceeded size limit, splitting into ${firstHalf.length} + ${secondHalf.length}`
9082
+ );
9083
+ }
8993
9084
  const [firstEmbeddings, secondEmbeddings] = await Promise.all([
8994
- this.embedDocumentsWithRetry(firstHalf),
8995
- this.embedDocumentsWithRetry(secondHalf)
9085
+ this.embedDocumentsWithRetry(firstHalf, true),
9086
+ this.embedDocumentsWithRetry(secondHalf, true)
8996
9087
  ]);
8997
9088
  return [...firstEmbeddings, ...secondEmbeddings];
8998
9089
  } else {
8999
9090
  const text = texts[0];
9000
9091
  const midpoint = Math.floor(text.length / 2);
9001
9092
  const firstHalf = text.substring(0, midpoint);
9002
- logger.warn(
9003
- `⚠️ Single text exceeded embedding size limit (${text.length} chars). Truncating at ${firstHalf.length} chars.`
9004
- );
9005
- try {
9006
- const embedding = await this.embedDocumentsWithRetry([firstHalf]);
9007
- logger.info(
9008
- `✓ Using embedding from first half of split text (${firstHalf.length} chars)`
9093
+ if (!isRetry) {
9094
+ logger.warn(
9095
+ `⚠️ Single text exceeded embedding size limit (${text.length} chars).`
9009
9096
  );
9097
+ }
9098
+ try {
9099
+ const embedding = await this.embedDocumentsWithRetry([firstHalf], true);
9010
9100
  return embedding;
9011
9101
  } catch (retryError) {
9012
9102
  logger.error(
@@ -9130,8 +9220,8 @@ class DocumentStore {
9130
9220
  const rowId = result2.lastInsertRowid;
9131
9221
  if (this.isVectorSearchEnabled && paddedEmbeddings.length > 0) {
9132
9222
  this.statements.insertEmbedding.run(
9133
- BigInt(rowId),
9134
- JSON.stringify(paddedEmbeddings[docIndex])
9223
+ JSON.stringify(paddedEmbeddings[docIndex]),
9224
+ BigInt(rowId)
9135
9225
  );
9136
9226
  }
9137
9227
  docIndex++;
@@ -10770,7 +10860,7 @@ const Layout = ({
10770
10860
  children,
10771
10861
  eventClientConfig
10772
10862
  }) => {
10773
- const versionString = version || "1.32.0";
10863
+ const versionString = version || "1.33.1";
10774
10864
  const versionInitializer = `versionUpdate({ currentVersion: ${`'${versionString}'`} })`;
10775
10865
  return /* @__PURE__ */ jsxs("html", { lang: "en", children: [
10776
10866
  /* @__PURE__ */ jsxs("head", { children: [
@@ -13082,7 +13172,7 @@ class AppServer {
13082
13172
  try {
13083
13173
  if (telemetry.isEnabled()) {
13084
13174
  telemetry.setGlobalContext({
13085
- appVersion: "1.32.0",
13175
+ appVersion: "1.33.1",
13086
13176
  appPlatform: process.platform,
13087
13177
  appNodeVersion: process.version,
13088
13178
  appServicesEnabled: this.getActiveServicesList(),
@@ -16569,7 +16659,7 @@ function createCliProgram() {
16569
16659
  const commandStartTimes = /* @__PURE__ */ new Map();
16570
16660
  let globalEventBus = null;
16571
16661
  let globalTelemetryService = null;
16572
- program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version("1.32.0").addOption(
16662
+ program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version("1.33.1").addOption(
16573
16663
  new Option("--verbose", "Enable verbose (debug) logging").conflicts("silent")
16574
16664
  ).addOption(new Option("--silent", "Disable all logging except errors")).addOption(
16575
16665
  new Option("--telemetry", "Enable telemetry collection").env("DOCS_MCP_TELEMETRY").argParser((value) => {
@@ -16603,7 +16693,7 @@ function createCliProgram() {
16603
16693
  if (shouldEnableTelemetry()) {
16604
16694
  if (telemetry.isEnabled()) {
16605
16695
  telemetry.setGlobalContext({
16606
- appVersion: "1.32.0",
16696
+ appVersion: "1.33.1",
16607
16697
  appPlatform: process.platform,
16608
16698
  appNodeVersion: process.version,
16609
16699
  appInterface: "cli",