@cosmocoder/mcp-web-docs 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -38,6 +38,7 @@ AI assistants struggle with documentation:
38
38
 
39
39
  - **🌐 Universal Crawler** - Works with any documentation site, not just GitHub
40
40
  - **🔍 Hybrid Search** - Combines full-text search (FTS) with semantic vector search
41
+ - **📂 Collections** - Group related docs into named collections for project-based organization
41
42
  - **🏷️ Tags & Categories** - Organize docs with tags and filter searches by project, team, or category
42
43
  - **📦 Version Support** - Index multiple versions of the same package (e.g., React 18 and 19)
43
44
  - **🔐 Authentication Support** - Crawl private/protected docs with interactive browser login (auto-detects your default browser)
@@ -390,6 +391,95 @@ Delete an indexed documentation site and all its data.
390
391
 
391
392
  Clear saved authentication session for a domain.
392
393
 
394
+ ### Collection Tools
395
+
396
+ Collections let you group related documentation sites together for project-based organization. Unlike tags (which categorize individual docs), collections create named workspaces like "My React Project" containing React + Next.js + TypeScript docs.
397
+
398
+ #### `create_collection`
399
+
400
+ Create a new collection to group documentation sites.
401
+
402
+ ```typescript
403
+ create_collection({
404
+ name: "My React Project",
405
+ description: "React, Next.js, and TypeScript docs for my project" // Optional
406
+ })
407
+ ```
408
+
409
+ #### `add_to_collection`
410
+
411
+ Add indexed documentation sites to a collection.
412
+
413
+ ```typescript
414
+ add_to_collection({
415
+ name: "My React Project",
416
+ urls: [
417
+ "https://react.dev/",
418
+ "https://nextjs.org/docs/",
419
+ "https://www.typescriptlang.org/docs/"
420
+ ]
421
+ })
422
+ ```
423
+
424
+ #### `search_collection`
425
+
426
+ Search within a specific collection. Uses the same hybrid search as `search_documentation` but limited to docs in the collection.
427
+
428
+ ```typescript
429
+ search_collection({
430
+ name: "My React Project",
431
+ query: "server components data fetching",
432
+ limit: 10 // Optional
433
+ })
434
+ ```
435
+
436
+ #### `list_collections`
437
+
438
+ List all collections with their document counts.
439
+
440
+ #### `get_collection`
441
+
442
+ Get details of a specific collection including all its documentation sites.
443
+
444
+ ```typescript
445
+ get_collection({
446
+ name: "My React Project"
447
+ })
448
+ ```
449
+
450
+ #### `update_collection`
451
+
452
+ Rename a collection or update its description.
453
+
454
+ ```typescript
455
+ update_collection({
456
+ name: "My React Project",
457
+ newName: "Frontend Stack", // Optional
458
+ description: "Updated description" // Optional
459
+ })
460
+ ```
461
+
462
+ #### `remove_from_collection`
463
+
464
+ Remove documentation sites from a collection. The sites remain indexed, just removed from the collection.
465
+
466
+ ```typescript
467
+ remove_from_collection({
468
+ name: "My React Project",
469
+ urls: ["https://old-library.dev/docs/"]
470
+ })
471
+ ```
472
+
473
+ #### `delete_collection`
474
+
475
+ Delete a collection. The documentation sites in the collection are **not** deleted, only the collection grouping.
476
+
477
+ ```typescript
478
+ delete_collection({
479
+ name: "Old Project"
480
+ })
481
+ ```
482
+
393
483
  ---
394
484
 
395
485
  ## 💡 Tips
@@ -471,6 +561,48 @@ search_documentation({
471
561
 
472
562
  You can also add tags to existing documentation with `set_tags`.
473
563
 
564
+ ### Using Collections for Project Organization
565
+
566
+ Collections provide a higher-level grouping than tags — they let you organize documentation by project or context, making it easy to switch between different work contexts.
567
+
568
+ **Create a collection for your project:**
569
+ ```typescript
570
+ create_collection({
571
+ name: "E-commerce Backend",
572
+ description: "All docs for the backend rewrite project"
573
+ })
574
+ ```
575
+
576
+ **Add relevant documentation:**
577
+ ```typescript
578
+ add_to_collection({
579
+ name: "E-commerce Backend",
580
+ urls: [
581
+ "https://fastapi.tiangolo.com/",
582
+ "https://docs.sqlalchemy.org/",
583
+ "https://redis.io/docs/"
584
+ ]
585
+ })
586
+ ```
587
+
588
+ **Search within your project context:**
589
+ ```typescript
590
+ search_collection({
591
+ name: "E-commerce Backend",
592
+ query: "connection pooling best practices"
593
+ })
594
+ ```
595
+
596
+ **Collections vs Tags:**
597
+ | Feature | Collections | Tags |
598
+ |---------|-------------|------|
599
+ | Purpose | Group docs as a project/workspace | Categorize individual docs |
600
+ | Structure | Named container with multiple docs | Labels on individual docs |
601
+ | Use case | "My React Project" with React + Next.js + TS | "This doc is about React" |
602
+ | Searching | `search_collection` for focused results | `tags` filter in `search_documentation` |
603
+
604
+ You can use both together — a document can have tags AND belong to multiple collections.
605
+
474
606
  ### Versioning Package Documentation
475
607
 
476
608
  When indexing documentation for versioned packages (React, Vue, Python libraries, etc.), you can specify the version to track which version you've indexed:
package/build/index.js CHANGED
@@ -23,7 +23,7 @@ import { AuthManager } from './crawler/auth.js';
23
23
  import { fetchFavicon } from './util/favicon.js';
24
24
  import { generateDocId } from './util/docs.js';
25
25
  import { logger } from './util/logger.js';
26
- import { StorageStateSchema, safeJsonParse, validateToolArgs, sanitizeErrorMessage, detectPromptInjection, wrapExternalContent, addInjectionWarnings, SessionExpiredError, AddDocumentationArgsSchema, AuthenticateArgsSchema, ClearAuthArgsSchema, SearchDocumentationArgsSchema, ReindexDocumentationArgsSchema, DeleteDocumentationArgsSchema, SetTagsArgsSchema, } from './util/security.js';
26
+ import { StorageStateSchema, safeJsonParse, validateToolArgs, sanitizeErrorMessage, detectPromptInjection, wrapExternalContent, addInjectionWarnings, SessionExpiredError, AddDocumentationArgsSchema, AuthenticateArgsSchema, ClearAuthArgsSchema, SearchDocumentationArgsSchema, ReindexDocumentationArgsSchema, DeleteDocumentationArgsSchema, SetTagsArgsSchema, CreateCollectionArgsSchema, DeleteCollectionArgsSchema, UpdateCollectionArgsSchema, GetCollectionArgsSchema, AddToCollectionArgsSchema, RemoveFromCollectionArgsSchema, SearchCollectionArgsSchema, } from './util/security.js';
27
27
  class WebDocsServer {
28
28
  server;
29
29
  config;
@@ -375,6 +375,143 @@ Examples where version doesn't matter: "Company engineering handbook", "AWS cons
375
375
  properties: {},
376
376
  },
377
377
  },
378
+ // ============ Collection Tools ============
379
+ {
380
+ name: 'create_collection',
381
+ description: 'Create a new collection to group related documentation sites. Collections help organize docs by project or context (e.g., "My React Project" with React + Next.js + TypeScript docs).',
382
+ inputSchema: {
383
+ type: 'object',
384
+ properties: {
385
+ name: {
386
+ type: 'string',
387
+ description: 'Unique name for the collection (e.g., "My React Project", "Backend APIs")',
388
+ },
389
+ description: {
390
+ type: 'string',
391
+ description: 'Optional description of what this collection contains',
392
+ },
393
+ },
394
+ required: ['name'],
395
+ },
396
+ },
397
+ {
398
+ name: 'delete_collection',
399
+ description: 'Delete a collection. The documentation sites in the collection are NOT deleted, only the collection grouping.',
400
+ inputSchema: {
401
+ type: 'object',
402
+ properties: {
403
+ name: {
404
+ type: 'string',
405
+ description: 'Name of the collection to delete',
406
+ },
407
+ },
408
+ required: ['name'],
409
+ },
410
+ },
411
+ {
412
+ name: 'update_collection',
413
+ description: "Update a collection's name or description.",
414
+ inputSchema: {
415
+ type: 'object',
416
+ properties: {
417
+ name: {
418
+ type: 'string',
419
+ description: 'Current name of the collection',
420
+ },
421
+ newName: {
422
+ type: 'string',
423
+ description: 'Optional new name for the collection',
424
+ },
425
+ description: {
426
+ type: 'string',
427
+ description: 'Optional new description for the collection',
428
+ },
429
+ },
430
+ required: ['name'],
431
+ },
432
+ },
433
+ {
434
+ name: 'list_collections',
435
+ description: 'List all collections with their document counts. Use this to see available collections for context switching.',
436
+ inputSchema: {
437
+ type: 'object',
438
+ properties: {},
439
+ },
440
+ },
441
+ {
442
+ name: 'get_collection',
443
+ description: 'Get details of a specific collection including all its documentation sites.',
444
+ inputSchema: {
445
+ type: 'object',
446
+ properties: {
447
+ name: {
448
+ type: 'string',
449
+ description: 'Name of the collection',
450
+ },
451
+ },
452
+ required: ['name'],
453
+ },
454
+ },
455
+ {
456
+ name: 'add_to_collection',
457
+ description: 'Add one or more documentation sites to a collection. Sites must already be indexed.',
458
+ inputSchema: {
459
+ type: 'object',
460
+ properties: {
461
+ name: {
462
+ type: 'string',
463
+ description: 'Name of the collection',
464
+ },
465
+ urls: {
466
+ type: 'array',
467
+ items: { type: 'string' },
468
+ description: 'URLs of indexed documentation sites to add (max 50)',
469
+ },
470
+ },
471
+ required: ['name', 'urls'],
472
+ },
473
+ },
474
+ {
475
+ name: 'remove_from_collection',
476
+ description: 'Remove one or more documentation sites from a collection. The sites remain indexed, just removed from the collection.',
477
+ inputSchema: {
478
+ type: 'object',
479
+ properties: {
480
+ name: {
481
+ type: 'string',
482
+ description: 'Name of the collection',
483
+ },
484
+ urls: {
485
+ type: 'array',
486
+ items: { type: 'string' },
487
+ description: 'URLs of documentation sites to remove from the collection',
488
+ },
489
+ },
490
+ required: ['name', 'urls'],
491
+ },
492
+ },
493
+ {
494
+ name: 'search_collection',
495
+ description: 'Search for documentation within a specific collection. This is useful for focused searches within a project context. Uses the same hybrid search (full-text + semantic) as search_documentation.',
496
+ inputSchema: {
497
+ type: 'object',
498
+ properties: {
499
+ name: {
500
+ type: 'string',
501
+ description: 'Name of the collection to search in',
502
+ },
503
+ query: {
504
+ type: 'string',
505
+ description: 'Search query - be specific and include unique terms',
506
+ },
507
+ limit: {
508
+ type: 'number',
509
+ description: 'Maximum number of results (default: 10)',
510
+ },
511
+ },
512
+ required: ['name', 'query'],
513
+ },
514
+ },
378
515
  ],
379
516
  }));
380
517
  // Handle tool calls
@@ -404,6 +541,23 @@ Examples where version doesn't matter: "Company engineering handbook", "AWS cons
404
541
  return this.handleSetTags(request.params.arguments);
405
542
  case 'list_tags':
406
543
  return this.handleListTags();
544
+ // Collection handlers
545
+ case 'create_collection':
546
+ return this.handleCreateCollection(request.params.arguments);
547
+ case 'delete_collection':
548
+ return this.handleDeleteCollection(request.params.arguments);
549
+ case 'update_collection':
550
+ return this.handleUpdateCollection(request.params.arguments);
551
+ case 'list_collections':
552
+ return this.handleListCollections();
553
+ case 'get_collection':
554
+ return this.handleGetCollection(request.params.arguments);
555
+ case 'add_to_collection':
556
+ return this.handleAddToCollection(request.params.arguments);
557
+ case 'remove_from_collection':
558
+ return this.handleRemoveFromCollection(request.params.arguments);
559
+ case 'search_collection':
560
+ return this.handleSearchCollection(request.params.arguments);
407
561
  default:
408
562
  throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${request.params.name}`);
409
563
  }
@@ -956,6 +1110,318 @@ Examples where version doesn't matter: "Company engineering handbook", "AWS cons
956
1110
  ],
957
1111
  };
958
1112
  }
1113
+ // ============ Collection Handlers ============
1114
+ /**
1115
+ * Handle creating a new collection
1116
+ */
1117
+ async handleCreateCollection(args) {
1118
+ let validatedArgs;
1119
+ try {
1120
+ validatedArgs = validateToolArgs(args, CreateCollectionArgsSchema);
1121
+ }
1122
+ catch (error) {
1123
+ throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
1124
+ }
1125
+ const { name, description } = validatedArgs;
1126
+ try {
1127
+ await this.store.createCollection(name, description);
1128
+ return {
1129
+ content: [
1130
+ {
1131
+ type: 'text',
1132
+ text: JSON.stringify({
1133
+ status: 'success',
1134
+ message: `Collection "${name}" created successfully`,
1135
+ collection: {
1136
+ name,
1137
+ description,
1138
+ },
1139
+ }, null, 2),
1140
+ },
1141
+ ],
1142
+ };
1143
+ }
1144
+ catch (error) {
1145
+ const safeMessage = sanitizeErrorMessage(error);
1146
+ if (safeMessage.includes('already exists')) {
1147
+ throw new McpError(ErrorCode.InvalidParams, safeMessage);
1148
+ }
1149
+ throw new McpError(ErrorCode.InternalError, `Failed to create collection: ${safeMessage}`);
1150
+ }
1151
+ }
1152
+ /**
1153
+ * Handle deleting a collection
1154
+ */
1155
+ async handleDeleteCollection(args) {
1156
+ let validatedArgs;
1157
+ try {
1158
+ validatedArgs = validateToolArgs(args, DeleteCollectionArgsSchema);
1159
+ }
1160
+ catch (error) {
1161
+ throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
1162
+ }
1163
+ const { name } = validatedArgs;
1164
+ try {
1165
+ await this.store.deleteCollection(name);
1166
+ return {
1167
+ content: [
1168
+ {
1169
+ type: 'text',
1170
+ text: JSON.stringify({
1171
+ status: 'success',
1172
+ message: `Collection "${name}" deleted. Documentation sites remain indexed.`,
1173
+ }, null, 2),
1174
+ },
1175
+ ],
1176
+ };
1177
+ }
1178
+ catch (error) {
1179
+ const safeMessage = sanitizeErrorMessage(error);
1180
+ if (safeMessage.includes('not found')) {
1181
+ throw new McpError(ErrorCode.InvalidParams, safeMessage);
1182
+ }
1183
+ throw new McpError(ErrorCode.InternalError, `Failed to delete collection: ${safeMessage}`);
1184
+ }
1185
+ }
1186
+ /**
1187
+ * Handle updating a collection's metadata
1188
+ */
1189
+ async handleUpdateCollection(args) {
1190
+ let validatedArgs;
1191
+ try {
1192
+ validatedArgs = validateToolArgs(args, UpdateCollectionArgsSchema);
1193
+ }
1194
+ catch (error) {
1195
+ throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
1196
+ }
1197
+ const { name, newName, description } = validatedArgs;
1198
+ // Must provide at least one field to update
1199
+ if (newName === undefined && description === undefined) {
1200
+ throw new McpError(ErrorCode.InvalidParams, 'Must provide newName or description to update');
1201
+ }
1202
+ try {
1203
+ await this.store.updateCollection(name, { newName, description });
1204
+ return {
1205
+ content: [
1206
+ {
1207
+ type: 'text',
1208
+ text: JSON.stringify({
1209
+ status: 'success',
1210
+ message: `Collection updated successfully`,
1211
+ collection: {
1212
+ name: newName ?? name,
1213
+ description,
1214
+ },
1215
+ }, null, 2),
1216
+ },
1217
+ ],
1218
+ };
1219
+ }
1220
+ catch (error) {
1221
+ const safeMessage = sanitizeErrorMessage(error);
1222
+ if (safeMessage.includes('not found') || safeMessage.includes('already exists')) {
1223
+ throw new McpError(ErrorCode.InvalidParams, safeMessage);
1224
+ }
1225
+ throw new McpError(ErrorCode.InternalError, `Failed to update collection: ${safeMessage}`);
1226
+ }
1227
+ }
1228
+ /**
1229
+ * Handle listing all collections
1230
+ */
1231
+ async handleListCollections() {
1232
+ const collections = await this.store.listCollections();
1233
+ return {
1234
+ content: [
1235
+ {
1236
+ type: 'text',
1237
+ text: JSON.stringify({
1238
+ collections,
1239
+ total: collections.length,
1240
+ }, null, 2),
1241
+ },
1242
+ ],
1243
+ };
1244
+ }
1245
+ /**
1246
+ * Handle getting a specific collection with its documents
1247
+ */
1248
+ async handleGetCollection(args) {
1249
+ let validatedArgs;
1250
+ try {
1251
+ validatedArgs = validateToolArgs(args, GetCollectionArgsSchema);
1252
+ }
1253
+ catch (error) {
1254
+ throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
1255
+ }
1256
+ const { name } = validatedArgs;
1257
+ const collection = await this.store.getCollection(name);
1258
+ if (!collection) {
1259
+ throw new McpError(ErrorCode.InvalidParams, `Collection "${name}" not found`);
1260
+ }
1261
+ return {
1262
+ content: [
1263
+ {
1264
+ type: 'text',
1265
+ text: JSON.stringify(collection, null, 2),
1266
+ },
1267
+ ],
1268
+ };
1269
+ }
1270
+ /**
1271
+ * Handle adding documents to a collection
1272
+ */
1273
+ async handleAddToCollection(args) {
1274
+ let validatedArgs;
1275
+ try {
1276
+ validatedArgs = validateToolArgs(args, AddToCollectionArgsSchema);
1277
+ }
1278
+ catch (error) {
1279
+ throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
1280
+ }
1281
+ const { name, urls } = validatedArgs;
1282
+ // Normalize URLs
1283
+ const normalizedUrls = urls.map((url) => normalizeUrl(url));
1284
+ try {
1285
+ const result = await this.store.addToCollection(name, normalizedUrls);
1286
+ return {
1287
+ content: [
1288
+ {
1289
+ type: 'text',
1290
+ text: JSON.stringify({
1291
+ status: 'success',
1292
+ message: `Added ${result.added.length} document(s) to collection "${name}"`,
1293
+ ...result,
1294
+ }, null, 2),
1295
+ },
1296
+ ],
1297
+ };
1298
+ }
1299
+ catch (error) {
1300
+ const safeMessage = sanitizeErrorMessage(error);
1301
+ if (safeMessage.includes('not found')) {
1302
+ throw new McpError(ErrorCode.InvalidParams, safeMessage);
1303
+ }
1304
+ throw new McpError(ErrorCode.InternalError, `Failed to add to collection: ${safeMessage}`);
1305
+ }
1306
+ }
1307
+ /**
1308
+ * Handle removing documents from a collection
1309
+ */
1310
+ async handleRemoveFromCollection(args) {
1311
+ let validatedArgs;
1312
+ try {
1313
+ validatedArgs = validateToolArgs(args, RemoveFromCollectionArgsSchema);
1314
+ }
1315
+ catch (error) {
1316
+ throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
1317
+ }
1318
+ const { name, urls } = validatedArgs;
1319
+ // Normalize URLs
1320
+ const normalizedUrls = urls.map((url) => normalizeUrl(url));
1321
+ try {
1322
+ const result = await this.store.removeFromCollection(name, normalizedUrls);
1323
+ return {
1324
+ content: [
1325
+ {
1326
+ type: 'text',
1327
+ text: JSON.stringify({
1328
+ status: 'success',
1329
+ message: `Removed ${result.removed.length} document(s) from collection "${name}"`,
1330
+ ...result,
1331
+ }, null, 2),
1332
+ },
1333
+ ],
1334
+ };
1335
+ }
1336
+ catch (error) {
1337
+ const safeMessage = sanitizeErrorMessage(error);
1338
+ if (safeMessage.includes('not found')) {
1339
+ throw new McpError(ErrorCode.InvalidParams, safeMessage);
1340
+ }
1341
+ throw new McpError(ErrorCode.InternalError, `Failed to remove from collection: ${safeMessage}`);
1342
+ }
1343
+ }
1344
+ /**
1345
+ * Handle searching within a collection
1346
+ */
1347
+ async handleSearchCollection(args) {
1348
+ let validatedArgs;
1349
+ try {
1350
+ validatedArgs = validateToolArgs(args, SearchCollectionArgsSchema);
1351
+ }
1352
+ catch (error) {
1353
+ throw new McpError(ErrorCode.InvalidParams, sanitizeErrorMessage(error));
1354
+ }
1355
+ const { name, query, limit = 10 } = validatedArgs;
1356
+ // Get URLs in the collection
1357
+ const collectionUrls = await this.store.getCollectionUrls(name);
1358
+ if (collectionUrls.length === 0) {
1359
+ // Check if collection exists but is empty
1360
+ const collection = await this.store.getCollection(name);
1361
+ if (!collection) {
1362
+ throw new McpError(ErrorCode.InvalidParams, `Collection "${name}" not found`);
1363
+ }
1364
+ return {
1365
+ content: [
1366
+ {
1367
+ type: 'text',
1368
+ text: JSON.stringify({
1369
+ results: [],
1370
+ message: `Collection "${name}" is empty. Add documentation sites to search.`,
1371
+ }, null, 2),
1372
+ },
1373
+ ],
1374
+ };
1375
+ }
1376
+ // Search using the existing searchByText but filter by collection URLs
1377
+ // We'll use a custom approach: search all then filter
1378
+ // This is less efficient than a native filter, but works with existing infrastructure
1379
+ const allResults = await this.store.searchByText(query, { limit: limit * 3 }); // Get more results to filter
1380
+ // Filter to only include results from collection URLs
1381
+ const collectionUrlSet = new Set(collectionUrls);
1382
+ let filteredResults = allResults.filter((result) => collectionUrlSet.has(result.url));
1383
+ // Apply limit
1384
+ filteredResults = filteredResults.slice(0, limit);
1385
+ // Apply prompt injection detection and filter/process results (same as handleSearchDocumentation)
1386
+ let blockedCount = 0;
1387
+ const safeResults = filteredResults
1388
+ .map((result) => {
1389
+ const injectionResult = detectPromptInjection(result.content);
1390
+ if (injectionResult.maxSeverity === 'high') {
1391
+ blockedCount++;
1392
+ logger.debug(`[Security] Blocked search result from ${result.url} due to high-severity injection pattern: ${injectionResult.detections[0]?.description}`);
1393
+ return null;
1394
+ }
1395
+ let safeContent = addInjectionWarnings(result.content, injectionResult);
1396
+ safeContent = wrapExternalContent(safeContent, result.url);
1397
+ return {
1398
+ ...result,
1399
+ content: safeContent,
1400
+ security: {
1401
+ isExternalContent: true,
1402
+ injectionDetected: injectionResult.hasInjection,
1403
+ injectionSeverity: injectionResult.maxSeverity,
1404
+ detectionCount: injectionResult.detections.length,
1405
+ },
1406
+ };
1407
+ })
1408
+ .filter((result) => result !== null);
1409
+ const response = {
1410
+ results: safeResults,
1411
+ collection: name,
1412
+ };
1413
+ if (blockedCount > 0) {
1414
+ response.securityNotice = `${blockedCount} result(s) were blocked due to high-severity prompt injection patterns.`;
1415
+ }
1416
+ return {
1417
+ content: [
1418
+ {
1419
+ type: 'text',
1420
+ text: JSON.stringify(response, null, 2),
1421
+ },
1422
+ ],
1423
+ };
1424
+ }
959
1425
  async indexAndAdd(id, url, title, reIndex = false, signal, pathPrefix, authInfo, tags, version) {
960
1426
  // Helper to check if operation was cancelled
961
1427
  const checkCancelled = () => {