@cosmocoder/mcp-web-docs 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +158 -0
- package/build/index.js +490 -8
- package/build/index.js.map +1 -1
- package/build/storage/storage.d.ts +64 -1
- package/build/storage/storage.js +373 -4
- package/build/storage/storage.js.map +1 -1
- package/build/storage/storage.test.js +361 -0
- package/build/storage/storage.test.js.map +1 -1
- package/build/types.d.ts +26 -0
- package/build/util/security.d.ts +56 -0
- package/build/util/security.js +63 -0
- package/build/util/security.js.map +1 -1
- package/package.json +12 -12
package/build/index.js
CHANGED
|
@@ -23,7 +23,7 @@ import { AuthManager } from './crawler/auth.js';
|
|
|
23
23
|
import { fetchFavicon } from './util/favicon.js';
|
|
24
24
|
import { generateDocId } from './util/docs.js';
|
|
25
25
|
import { logger } from './util/logger.js';
|
|
26
|
-
import { StorageStateSchema, safeJsonParse, validateToolArgs, sanitizeErrorMessage, detectPromptInjection, wrapExternalContent, addInjectionWarnings, SessionExpiredError, AddDocumentationArgsSchema, AuthenticateArgsSchema, ClearAuthArgsSchema, SearchDocumentationArgsSchema, ReindexDocumentationArgsSchema, DeleteDocumentationArgsSchema, SetTagsArgsSchema, } from './util/security.js';
|
|
26
|
+
import { StorageStateSchema, safeJsonParse, validateToolArgs, sanitizeErrorMessage, detectPromptInjection, wrapExternalContent, addInjectionWarnings, SessionExpiredError, AddDocumentationArgsSchema, AuthenticateArgsSchema, ClearAuthArgsSchema, SearchDocumentationArgsSchema, ReindexDocumentationArgsSchema, DeleteDocumentationArgsSchema, SetTagsArgsSchema, CreateCollectionArgsSchema, DeleteCollectionArgsSchema, UpdateCollectionArgsSchema, GetCollectionArgsSchema, AddToCollectionArgsSchema, RemoveFromCollectionArgsSchema, SearchCollectionArgsSchema, } from './util/security.js';
|
|
27
27
|
class WebDocsServer {
|
|
28
28
|
server;
|
|
29
29
|
config;
|
|
@@ -128,7 +128,17 @@ class WebDocsServer {
|
|
|
128
128
|
name: 'add_documentation',
|
|
129
129
|
description: `Add new documentation site for indexing. Supports authenticated sites via the auth options.
|
|
130
130
|
|
|
131
|
-
IMPORTANT: Before calling this tool, ask the user if they want to restrict crawling to a specific path prefix. For example, if indexing https://docs.example.com/api/v2/overview, the user might want to restrict to '/api/v2' to avoid crawling unrelated sections of the site
|
|
131
|
+
IMPORTANT: Before calling this tool, ask the user if they want to restrict crawling to a specific path prefix. For example, if indexing https://docs.example.com/api/v2/overview, the user might want to restrict to '/api/v2' to avoid crawling unrelated sections of the site.
|
|
132
|
+
|
|
133
|
+
VERSIONING: If the user is indexing documentation for a versioned software package/library (e.g., React, Vue, Python, a database, an SDK), ask what version they want to associate with this documentation. Many packages have multiple versions with different APIs.
|
|
134
|
+
|
|
135
|
+
Do NOT ask about versioning for:
|
|
136
|
+
- Internal company documentation (wikis, best practices, runbooks)
|
|
137
|
+
- Single-version products or services
|
|
138
|
+
- Documentation the user indicates should always reflect "latest"
|
|
139
|
+
|
|
140
|
+
Examples where version matters: "React 18", "Python 3.11", "PostgreSQL 15", "Next.js 14"
|
|
141
|
+
Examples where version doesn't matter: "Company engineering handbook", "AWS console docs", "Confluence spaces"`,
|
|
132
142
|
inputSchema: {
|
|
133
143
|
type: 'object',
|
|
134
144
|
properties: {
|
|
@@ -153,6 +163,10 @@ IMPORTANT: Before calling this tool, ask the user if they want to restrict crawl
|
|
|
153
163
|
items: { type: 'string' },
|
|
154
164
|
description: 'Optional tags to categorize the documentation (e.g., ["frontend", "mycompany"]). Tags help filter search results across multiple documentation sites.',
|
|
155
165
|
},
|
|
166
|
+
version: {
|
|
167
|
+
type: 'string',
|
|
168
|
+
description: 'Optional version identifier for versioned package documentation (e.g., "18", "v6.4", "3.11", "latest"). Helps distinguish between multiple versions of the same package.',
|
|
169
|
+
},
|
|
156
170
|
auth: {
|
|
157
171
|
type: 'object',
|
|
158
172
|
description: 'Authentication options for protected documentation sites',
|
|
@@ -361,6 +375,143 @@ IMPORTANT: Before calling this tool, ask the user if they want to restrict crawl
|
|
|
361
375
|
properties: {},
|
|
362
376
|
},
|
|
363
377
|
},
|
|
378
|
+
// ============ Collection Tools ============
|
|
379
|
+
{
|
|
380
|
+
name: 'create_collection',
|
|
381
|
+
description: 'Create a new collection to group related documentation sites. Collections help organize docs by project or context (e.g., "My React Project" with React + Next.js + TypeScript docs).',
|
|
382
|
+
inputSchema: {
|
|
383
|
+
type: 'object',
|
|
384
|
+
properties: {
|
|
385
|
+
name: {
|
|
386
|
+
type: 'string',
|
|
387
|
+
description: 'Unique name for the collection (e.g., "My React Project", "Backend APIs")',
|
|
388
|
+
},
|
|
389
|
+
description: {
|
|
390
|
+
type: 'string',
|
|
391
|
+
description: 'Optional description of what this collection contains',
|
|
392
|
+
},
|
|
393
|
+
},
|
|
394
|
+
required: ['name'],
|
|
395
|
+
},
|
|
396
|
+
},
|
|
397
|
+
{
|
|
398
|
+
name: 'delete_collection',
|
|
399
|
+
description: 'Delete a collection. The documentation sites in the collection are NOT deleted, only the collection grouping.',
|
|
400
|
+
inputSchema: {
|
|
401
|
+
type: 'object',
|
|
402
|
+
properties: {
|
|
403
|
+
name: {
|
|
404
|
+
type: 'string',
|
|
405
|
+
description: 'Name of the collection to delete',
|
|
406
|
+
},
|
|
407
|
+
},
|
|
408
|
+
required: ['name'],
|
|
409
|
+
},
|
|
410
|
+
},
|
|
411
|
+
{
|
|
412
|
+
name: 'update_collection',
|
|
413
|
+
description: "Update a collection's name or description.",
|
|
414
|
+
inputSchema: {
|
|
415
|
+
type: 'object',
|
|
416
|
+
properties: {
|
|
417
|
+
name: {
|
|
418
|
+
type: 'string',
|
|
419
|
+
description: 'Current name of the collection',
|
|
420
|
+
},
|
|
421
|
+
newName: {
|
|
422
|
+
type: 'string',
|
|
423
|
+
description: 'Optional new name for the collection',
|
|
424
|
+
},
|
|
425
|
+
description: {
|
|
426
|
+
type: 'string',
|
|
427
|
+
description: 'Optional new description for the collection',
|
|
428
|
+
},
|
|
429
|
+
},
|
|
430
|
+
required: ['name'],
|
|
431
|
+
},
|
|
432
|
+
},
|
|
433
|
+
{
|
|
434
|
+
name: 'list_collections',
|
|
435
|
+
description: 'List all collections with their document counts. Use this to see available collections for context switching.',
|
|
436
|
+
inputSchema: {
|
|
437
|
+
type: 'object',
|
|
438
|
+
properties: {},
|
|
439
|
+
},
|
|
440
|
+
},
|
|
441
|
+
{
|
|
442
|
+
name: 'get_collection',
|
|
443
|
+
description: 'Get details of a specific collection including all its documentation sites.',
|
|
444
|
+
inputSchema: {
|
|
445
|
+
type: 'object',
|
|
446
|
+
properties: {
|
|
447
|
+
name: {
|
|
448
|
+
type: 'string',
|
|
449
|
+
description: 'Name of the collection',
|
|
450
|
+
},
|
|
451
|
+
},
|
|
452
|
+
required: ['name'],
|
|
453
|
+
},
|
|
454
|
+
},
|
|
455
|
+
{
|
|
456
|
+
name: 'add_to_collection',
|
|
457
|
+
description: 'Add one or more documentation sites to a collection. Sites must already be indexed.',
|
|
458
|
+
inputSchema: {
|
|
459
|
+
type: 'object',
|
|
460
|
+
properties: {
|
|
461
|
+
name: {
|
|
462
|
+
type: 'string',
|
|
463
|
+
description: 'Name of the collection',
|
|
464
|
+
},
|
|
465
|
+
urls: {
|
|
466
|
+
type: 'array',
|
|
467
|
+
items: { type: 'string' },
|
|
468
|
+
description: 'URLs of indexed documentation sites to add (max 50)',
|
|
469
|
+
},
|
|
470
|
+
},
|
|
471
|
+
required: ['name', 'urls'],
|
|
472
|
+
},
|
|
473
|
+
},
|
|
474
|
+
{
|
|
475
|
+
name: 'remove_from_collection',
|
|
476
|
+
description: 'Remove one or more documentation sites from a collection. The sites remain indexed, just removed from the collection.',
|
|
477
|
+
inputSchema: {
|
|
478
|
+
type: 'object',
|
|
479
|
+
properties: {
|
|
480
|
+
name: {
|
|
481
|
+
type: 'string',
|
|
482
|
+
description: 'Name of the collection',
|
|
483
|
+
},
|
|
484
|
+
urls: {
|
|
485
|
+
type: 'array',
|
|
486
|
+
items: { type: 'string' },
|
|
487
|
+
description: 'URLs of documentation sites to remove from the collection',
|
|
488
|
+
},
|
|
489
|
+
},
|
|
490
|
+
required: ['name', 'urls'],
|
|
491
|
+
},
|
|
492
|
+
},
|
|
493
|
+
{
|
|
494
|
+
name: 'search_collection',
|
|
495
|
+
description: 'Search for documentation within a specific collection. This is useful for focused searches within a project context. Uses the same hybrid search (full-text + semantic) as search_documentation.',
|
|
496
|
+
inputSchema: {
|
|
497
|
+
type: 'object',
|
|
498
|
+
properties: {
|
|
499
|
+
name: {
|
|
500
|
+
type: 'string',
|
|
501
|
+
description: 'Name of the collection to search in',
|
|
502
|
+
},
|
|
503
|
+
query: {
|
|
504
|
+
type: 'string',
|
|
505
|
+
description: 'Search query - be specific and include unique terms',
|
|
506
|
+
},
|
|
507
|
+
limit: {
|
|
508
|
+
type: 'number',
|
|
509
|
+
description: 'Maximum number of results (default: 10)',
|
|
510
|
+
},
|
|
511
|
+
},
|
|
512
|
+
required: ['name', 'query'],
|
|
513
|
+
},
|
|
514
|
+
},
|
|
364
515
|
],
|
|
365
516
|
}));
|
|
366
517
|
// Handle tool calls
|
|
@@ -390,6 +541,23 @@ IMPORTANT: Before calling this tool, ask the user if they want to restrict crawl
|
|
|
390
541
|
return this.handleSetTags(request.params.arguments);
|
|
391
542
|
case 'list_tags':
|
|
392
543
|
return this.handleListTags();
|
|
544
|
+
// Collection handlers
|
|
545
|
+
case 'create_collection':
|
|
546
|
+
return this.handleCreateCollection(request.params.arguments);
|
|
547
|
+
case 'delete_collection':
|
|
548
|
+
return this.handleDeleteCollection(request.params.arguments);
|
|
549
|
+
case 'update_collection':
|
|
550
|
+
return this.handleUpdateCollection(request.params.arguments);
|
|
551
|
+
case 'list_collections':
|
|
552
|
+
return this.handleListCollections();
|
|
553
|
+
case 'get_collection':
|
|
554
|
+
return this.handleGetCollection(request.params.arguments);
|
|
555
|
+
case 'add_to_collection':
|
|
556
|
+
return this.handleAddToCollection(request.params.arguments);
|
|
557
|
+
case 'remove_from_collection':
|
|
558
|
+
return this.handleRemoveFromCollection(request.params.arguments);
|
|
559
|
+
case 'search_collection':
|
|
560
|
+
return this.handleSearchCollection(request.params.arguments);
|
|
393
561
|
default:
|
|
394
562
|
throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${request.params.name}`);
|
|
395
563
|
}
|
|
@@ -404,7 +572,7 @@ IMPORTANT: Before calling this tool, ask the user if they want to restrict crawl
|
|
|
404
572
|
catch (error) {
|
|
405
573
|
throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
|
|
406
574
|
}
|
|
407
|
-
const { url, title, id, pathPrefix, tags, auth: authOptions } = validatedArgs;
|
|
575
|
+
const { url, title, id, pathPrefix, tags, version, auth: authOptions } = validatedArgs;
|
|
408
576
|
// Additional SSRF protection check
|
|
409
577
|
if (!isValidPublicUrl(url)) {
|
|
410
578
|
throw new McpError(ErrorCode.InvalidParams, 'Access to private networks is blocked');
|
|
@@ -472,7 +640,7 @@ IMPORTANT: Before calling this tool, ask the user if they want to restrict crawl
|
|
|
472
640
|
// Start indexing process
|
|
473
641
|
this.statusTracker.startIndexing(docId, normalizedUrl, docTitle);
|
|
474
642
|
// Start indexing in the background with abort support
|
|
475
|
-
const operationPromise = this.indexAndAdd(docId, normalizedUrl, docTitle, false, controller.signal, pathPrefix, authInfo, tags)
|
|
643
|
+
const operationPromise = this.indexAndAdd(docId, normalizedUrl, docTitle, false, controller.signal, pathPrefix, authInfo, tags, version)
|
|
476
644
|
.catch((error) => {
|
|
477
645
|
const err = error;
|
|
478
646
|
if (err?.name !== 'AbortError') {
|
|
@@ -615,8 +783,9 @@ IMPORTANT: Before calling this tool, ask the user if they want to restrict crawl
|
|
|
615
783
|
authDomain: doc.authDomain || new URL(normalizedUrl).hostname,
|
|
616
784
|
}
|
|
617
785
|
: undefined;
|
|
618
|
-
// Preserve existing tags during reindex
|
|
786
|
+
// Preserve existing tags and version during reindex
|
|
619
787
|
const existingTags = doc.tags;
|
|
788
|
+
const existingVersion = doc.version;
|
|
620
789
|
// Cancel any existing operation for this URL
|
|
621
790
|
const wasCancelled = this.indexingQueue.isIndexing(normalizedUrl);
|
|
622
791
|
const controller = await this.indexingQueue.startOperation(normalizedUrl);
|
|
@@ -627,8 +796,8 @@ IMPORTANT: Before calling this tool, ask the user if they want to restrict crawl
|
|
|
627
796
|
logger.info(`[Progress] Registered token for ${docId}: ${progressToken}`);
|
|
628
797
|
}
|
|
629
798
|
this.statusTracker.startIndexing(docId, normalizedUrl, doc.title);
|
|
630
|
-
// Start reindexing in the background with abort support (preserving existing tags)
|
|
631
|
-
const operationPromise = this.indexAndAdd(docId, normalizedUrl, doc.title, true, controller.signal, undefined, authInfo, existingTags)
|
|
799
|
+
// Start reindexing in the background with abort support (preserving existing tags and version)
|
|
800
|
+
const operationPromise = this.indexAndAdd(docId, normalizedUrl, doc.title, true, controller.signal, undefined, authInfo, existingTags, existingVersion)
|
|
632
801
|
.catch((error) => {
|
|
633
802
|
const err = error;
|
|
634
803
|
if (err?.name !== 'AbortError') {
|
|
@@ -941,7 +1110,319 @@ IMPORTANT: Before calling this tool, ask the user if they want to restrict crawl
|
|
|
941
1110
|
],
|
|
942
1111
|
};
|
|
943
1112
|
}
|
|
944
|
-
|
|
1113
|
+
// ============ Collection Handlers ============
|
|
1114
|
+
/**
|
|
1115
|
+
* Handle creating a new collection
|
|
1116
|
+
*/
|
|
1117
|
+
async handleCreateCollection(args) {
|
|
1118
|
+
let validatedArgs;
|
|
1119
|
+
try {
|
|
1120
|
+
validatedArgs = validateToolArgs(args, CreateCollectionArgsSchema);
|
|
1121
|
+
}
|
|
1122
|
+
catch (error) {
|
|
1123
|
+
throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
|
|
1124
|
+
}
|
|
1125
|
+
const { name, description } = validatedArgs;
|
|
1126
|
+
try {
|
|
1127
|
+
await this.store.createCollection(name, description);
|
|
1128
|
+
return {
|
|
1129
|
+
content: [
|
|
1130
|
+
{
|
|
1131
|
+
type: 'text',
|
|
1132
|
+
text: JSON.stringify({
|
|
1133
|
+
status: 'success',
|
|
1134
|
+
message: `Collection "${name}" created successfully`,
|
|
1135
|
+
collection: {
|
|
1136
|
+
name,
|
|
1137
|
+
description,
|
|
1138
|
+
},
|
|
1139
|
+
}, null, 2),
|
|
1140
|
+
},
|
|
1141
|
+
],
|
|
1142
|
+
};
|
|
1143
|
+
}
|
|
1144
|
+
catch (error) {
|
|
1145
|
+
const safeMessage = sanitizeErrorMessage(error);
|
|
1146
|
+
if (safeMessage.includes('already exists')) {
|
|
1147
|
+
throw new McpError(ErrorCode.InvalidParams, safeMessage);
|
|
1148
|
+
}
|
|
1149
|
+
throw new McpError(ErrorCode.InternalError, `Failed to create collection: ${safeMessage}`);
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
1152
|
+
/**
|
|
1153
|
+
* Handle deleting a collection
|
|
1154
|
+
*/
|
|
1155
|
+
async handleDeleteCollection(args) {
|
|
1156
|
+
let validatedArgs;
|
|
1157
|
+
try {
|
|
1158
|
+
validatedArgs = validateToolArgs(args, DeleteCollectionArgsSchema);
|
|
1159
|
+
}
|
|
1160
|
+
catch (error) {
|
|
1161
|
+
throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
|
|
1162
|
+
}
|
|
1163
|
+
const { name } = validatedArgs;
|
|
1164
|
+
try {
|
|
1165
|
+
await this.store.deleteCollection(name);
|
|
1166
|
+
return {
|
|
1167
|
+
content: [
|
|
1168
|
+
{
|
|
1169
|
+
type: 'text',
|
|
1170
|
+
text: JSON.stringify({
|
|
1171
|
+
status: 'success',
|
|
1172
|
+
message: `Collection "${name}" deleted. Documentation sites remain indexed.`,
|
|
1173
|
+
}, null, 2),
|
|
1174
|
+
},
|
|
1175
|
+
],
|
|
1176
|
+
};
|
|
1177
|
+
}
|
|
1178
|
+
catch (error) {
|
|
1179
|
+
const safeMessage = sanitizeErrorMessage(error);
|
|
1180
|
+
if (safeMessage.includes('not found')) {
|
|
1181
|
+
throw new McpError(ErrorCode.InvalidParams, safeMessage);
|
|
1182
|
+
}
|
|
1183
|
+
throw new McpError(ErrorCode.InternalError, `Failed to delete collection: ${safeMessage}`);
|
|
1184
|
+
}
|
|
1185
|
+
}
|
|
1186
|
+
/**
|
|
1187
|
+
* Handle updating a collection's metadata
|
|
1188
|
+
*/
|
|
1189
|
+
async handleUpdateCollection(args) {
|
|
1190
|
+
let validatedArgs;
|
|
1191
|
+
try {
|
|
1192
|
+
validatedArgs = validateToolArgs(args, UpdateCollectionArgsSchema);
|
|
1193
|
+
}
|
|
1194
|
+
catch (error) {
|
|
1195
|
+
throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
|
|
1196
|
+
}
|
|
1197
|
+
const { name, newName, description } = validatedArgs;
|
|
1198
|
+
// Must provide at least one field to update
|
|
1199
|
+
if (newName === undefined && description === undefined) {
|
|
1200
|
+
throw new McpError(ErrorCode.InvalidParams, 'Must provide newName or description to update');
|
|
1201
|
+
}
|
|
1202
|
+
try {
|
|
1203
|
+
await this.store.updateCollection(name, { newName, description });
|
|
1204
|
+
return {
|
|
1205
|
+
content: [
|
|
1206
|
+
{
|
|
1207
|
+
type: 'text',
|
|
1208
|
+
text: JSON.stringify({
|
|
1209
|
+
status: 'success',
|
|
1210
|
+
message: `Collection updated successfully`,
|
|
1211
|
+
collection: {
|
|
1212
|
+
name: newName ?? name,
|
|
1213
|
+
description,
|
|
1214
|
+
},
|
|
1215
|
+
}, null, 2),
|
|
1216
|
+
},
|
|
1217
|
+
],
|
|
1218
|
+
};
|
|
1219
|
+
}
|
|
1220
|
+
catch (error) {
|
|
1221
|
+
const safeMessage = sanitizeErrorMessage(error);
|
|
1222
|
+
if (safeMessage.includes('not found') || safeMessage.includes('already exists')) {
|
|
1223
|
+
throw new McpError(ErrorCode.InvalidParams, safeMessage);
|
|
1224
|
+
}
|
|
1225
|
+
throw new McpError(ErrorCode.InternalError, `Failed to update collection: ${safeMessage}`);
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
/**
|
|
1229
|
+
* Handle listing all collections
|
|
1230
|
+
*/
|
|
1231
|
+
async handleListCollections() {
|
|
1232
|
+
const collections = await this.store.listCollections();
|
|
1233
|
+
return {
|
|
1234
|
+
content: [
|
|
1235
|
+
{
|
|
1236
|
+
type: 'text',
|
|
1237
|
+
text: JSON.stringify({
|
|
1238
|
+
collections,
|
|
1239
|
+
total: collections.length,
|
|
1240
|
+
}, null, 2),
|
|
1241
|
+
},
|
|
1242
|
+
],
|
|
1243
|
+
};
|
|
1244
|
+
}
|
|
1245
|
+
/**
|
|
1246
|
+
* Handle getting a specific collection with its documents
|
|
1247
|
+
*/
|
|
1248
|
+
async handleGetCollection(args) {
|
|
1249
|
+
let validatedArgs;
|
|
1250
|
+
try {
|
|
1251
|
+
validatedArgs = validateToolArgs(args, GetCollectionArgsSchema);
|
|
1252
|
+
}
|
|
1253
|
+
catch (error) {
|
|
1254
|
+
throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
|
|
1255
|
+
}
|
|
1256
|
+
const { name } = validatedArgs;
|
|
1257
|
+
const collection = await this.store.getCollection(name);
|
|
1258
|
+
if (!collection) {
|
|
1259
|
+
throw new McpError(ErrorCode.InvalidParams, `Collection "${name}" not found`);
|
|
1260
|
+
}
|
|
1261
|
+
return {
|
|
1262
|
+
content: [
|
|
1263
|
+
{
|
|
1264
|
+
type: 'text',
|
|
1265
|
+
text: JSON.stringify(collection, null, 2),
|
|
1266
|
+
},
|
|
1267
|
+
],
|
|
1268
|
+
};
|
|
1269
|
+
}
|
|
1270
|
+
/**
|
|
1271
|
+
* Handle adding documents to a collection
|
|
1272
|
+
*/
|
|
1273
|
+
async handleAddToCollection(args) {
|
|
1274
|
+
let validatedArgs;
|
|
1275
|
+
try {
|
|
1276
|
+
validatedArgs = validateToolArgs(args, AddToCollectionArgsSchema);
|
|
1277
|
+
}
|
|
1278
|
+
catch (error) {
|
|
1279
|
+
throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
|
|
1280
|
+
}
|
|
1281
|
+
const { name, urls } = validatedArgs;
|
|
1282
|
+
// Normalize URLs
|
|
1283
|
+
const normalizedUrls = urls.map((url) => normalizeUrl(url));
|
|
1284
|
+
try {
|
|
1285
|
+
const result = await this.store.addToCollection(name, normalizedUrls);
|
|
1286
|
+
return {
|
|
1287
|
+
content: [
|
|
1288
|
+
{
|
|
1289
|
+
type: 'text',
|
|
1290
|
+
text: JSON.stringify({
|
|
1291
|
+
status: 'success',
|
|
1292
|
+
message: `Added ${result.added.length} document(s) to collection "${name}"`,
|
|
1293
|
+
...result,
|
|
1294
|
+
}, null, 2),
|
|
1295
|
+
},
|
|
1296
|
+
],
|
|
1297
|
+
};
|
|
1298
|
+
}
|
|
1299
|
+
catch (error) {
|
|
1300
|
+
const safeMessage = sanitizeErrorMessage(error);
|
|
1301
|
+
if (safeMessage.includes('not found')) {
|
|
1302
|
+
throw new McpError(ErrorCode.InvalidParams, safeMessage);
|
|
1303
|
+
}
|
|
1304
|
+
throw new McpError(ErrorCode.InternalError, `Failed to add to collection: ${safeMessage}`);
|
|
1305
|
+
}
|
|
1306
|
+
}
|
|
1307
|
+
/**
|
|
1308
|
+
* Handle removing documents from a collection
|
|
1309
|
+
*/
|
|
1310
|
+
async handleRemoveFromCollection(args) {
|
|
1311
|
+
let validatedArgs;
|
|
1312
|
+
try {
|
|
1313
|
+
validatedArgs = validateToolArgs(args, RemoveFromCollectionArgsSchema);
|
|
1314
|
+
}
|
|
1315
|
+
catch (error) {
|
|
1316
|
+
throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
|
|
1317
|
+
}
|
|
1318
|
+
const { name, urls } = validatedArgs;
|
|
1319
|
+
// Normalize URLs
|
|
1320
|
+
const normalizedUrls = urls.map((url) => normalizeUrl(url));
|
|
1321
|
+
try {
|
|
1322
|
+
const result = await this.store.removeFromCollection(name, normalizedUrls);
|
|
1323
|
+
return {
|
|
1324
|
+
content: [
|
|
1325
|
+
{
|
|
1326
|
+
type: 'text',
|
|
1327
|
+
text: JSON.stringify({
|
|
1328
|
+
status: 'success',
|
|
1329
|
+
message: `Removed ${result.removed.length} document(s) from collection "${name}"`,
|
|
1330
|
+
...result,
|
|
1331
|
+
}, null, 2),
|
|
1332
|
+
},
|
|
1333
|
+
],
|
|
1334
|
+
};
|
|
1335
|
+
}
|
|
1336
|
+
catch (error) {
|
|
1337
|
+
const safeMessage = sanitizeErrorMessage(error);
|
|
1338
|
+
if (safeMessage.includes('not found')) {
|
|
1339
|
+
throw new McpError(ErrorCode.InvalidParams, safeMessage);
|
|
1340
|
+
}
|
|
1341
|
+
throw new McpError(ErrorCode.InternalError, `Failed to remove from collection: ${safeMessage}`);
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
/**
|
|
1345
|
+
* Handle searching within a collection
|
|
1346
|
+
*/
|
|
1347
|
+
async handleSearchCollection(args) {
|
|
1348
|
+
let validatedArgs;
|
|
1349
|
+
try {
|
|
1350
|
+
validatedArgs = validateToolArgs(args, SearchCollectionArgsSchema);
|
|
1351
|
+
}
|
|
1352
|
+
catch (error) {
|
|
1353
|
+
throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
|
|
1354
|
+
}
|
|
1355
|
+
const { name, query, limit = 10 } = validatedArgs;
|
|
1356
|
+
// Get URLs in the collection
|
|
1357
|
+
const collectionUrls = await this.store.getCollectionUrls(name);
|
|
1358
|
+
if (collectionUrls.length === 0) {
|
|
1359
|
+
// Check if collection exists but is empty
|
|
1360
|
+
const collection = await this.store.getCollection(name);
|
|
1361
|
+
if (!collection) {
|
|
1362
|
+
throw new McpError(ErrorCode.InvalidParams, `Collection "${name}" not found`);
|
|
1363
|
+
}
|
|
1364
|
+
return {
|
|
1365
|
+
content: [
|
|
1366
|
+
{
|
|
1367
|
+
type: 'text',
|
|
1368
|
+
text: JSON.stringify({
|
|
1369
|
+
results: [],
|
|
1370
|
+
message: `Collection "${name}" is empty. Add documentation sites to search.`,
|
|
1371
|
+
}, null, 2),
|
|
1372
|
+
},
|
|
1373
|
+
],
|
|
1374
|
+
};
|
|
1375
|
+
}
|
|
1376
|
+
// Search using the existing searchByText but filter by collection URLs
|
|
1377
|
+
// We'll use a custom approach: search all then filter
|
|
1378
|
+
// This is less efficient than a native filter, but works with existing infrastructure
|
|
1379
|
+
const allResults = await this.store.searchByText(query, { limit: limit * 3 }); // Get more results to filter
|
|
1380
|
+
// Filter to only include results from collection URLs
|
|
1381
|
+
const collectionUrlSet = new Set(collectionUrls);
|
|
1382
|
+
let filteredResults = allResults.filter((result) => collectionUrlSet.has(result.url));
|
|
1383
|
+
// Apply limit
|
|
1384
|
+
filteredResults = filteredResults.slice(0, limit);
|
|
1385
|
+
// Apply prompt injection detection and filter/process results (same as handleSearchDocumentation)
|
|
1386
|
+
let blockedCount = 0;
|
|
1387
|
+
const safeResults = filteredResults
|
|
1388
|
+
.map((result) => {
|
|
1389
|
+
const injectionResult = detectPromptInjection(result.content);
|
|
1390
|
+
if (injectionResult.maxSeverity === 'high') {
|
|
1391
|
+
blockedCount++;
|
|
1392
|
+
logger.debug(`[Security] Blocked search result from ${result.url} due to high-severity injection pattern: ${injectionResult.detections[0]?.description}`);
|
|
1393
|
+
return null;
|
|
1394
|
+
}
|
|
1395
|
+
let safeContent = addInjectionWarnings(result.content, injectionResult);
|
|
1396
|
+
safeContent = wrapExternalContent(safeContent, result.url);
|
|
1397
|
+
return {
|
|
1398
|
+
...result,
|
|
1399
|
+
content: safeContent,
|
|
1400
|
+
security: {
|
|
1401
|
+
isExternalContent: true,
|
|
1402
|
+
injectionDetected: injectionResult.hasInjection,
|
|
1403
|
+
injectionSeverity: injectionResult.maxSeverity,
|
|
1404
|
+
detectionCount: injectionResult.detections.length,
|
|
1405
|
+
},
|
|
1406
|
+
};
|
|
1407
|
+
})
|
|
1408
|
+
.filter((result) => result !== null);
|
|
1409
|
+
const response = {
|
|
1410
|
+
results: safeResults,
|
|
1411
|
+
collection: name,
|
|
1412
|
+
};
|
|
1413
|
+
if (blockedCount > 0) {
|
|
1414
|
+
response.securityNotice = `${blockedCount} result(s) were blocked due to high-severity prompt injection patterns.`;
|
|
1415
|
+
}
|
|
1416
|
+
return {
|
|
1417
|
+
content: [
|
|
1418
|
+
{
|
|
1419
|
+
type: 'text',
|
|
1420
|
+
text: JSON.stringify(response, null, 2),
|
|
1421
|
+
},
|
|
1422
|
+
],
|
|
1423
|
+
};
|
|
1424
|
+
}
|
|
1425
|
+
async indexAndAdd(id, url, title, reIndex = false, signal, pathPrefix, authInfo, tags, version) {
|
|
945
1426
|
// Helper to check if operation was cancelled
|
|
946
1427
|
const checkCancelled = () => {
|
|
947
1428
|
if (signal?.aborted) {
|
|
@@ -1092,6 +1573,7 @@ IMPORTANT: Before calling this tool, ask the user if they want to restrict crawl
|
|
|
1092
1573
|
lastIndexed: new Date(),
|
|
1093
1574
|
requiresAuth: authInfo?.requiresAuth,
|
|
1094
1575
|
authDomain: authInfo?.authDomain,
|
|
1576
|
+
version,
|
|
1095
1577
|
},
|
|
1096
1578
|
chunks: chunks.map((chunk, i) => ({
|
|
1097
1579
|
...chunk,
|