webcontext-ai 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +583 -0
- package/dist/browser/manager.d.ts +47 -0
- package/dist/browser/manager.d.ts.map +1 -0
- package/dist/browser/manager.js +215 -0
- package/dist/browser/manager.js.map +1 -0
- package/dist/cache/cache.d.ts +22 -0
- package/dist/cache/cache.d.ts.map +1 -0
- package/dist/cache/cache.js +150 -0
- package/dist/cache/cache.js.map +1 -0
- package/dist/chunking/chunker.d.ts +26 -0
- package/dist/chunking/chunker.d.ts.map +1 -0
- package/dist/chunking/chunker.js +208 -0
- package/dist/chunking/chunker.js.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +406 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/core/pipeline.d.ts +35 -0
- package/dist/core/pipeline.d.ts.map +1 -0
- package/dist/core/pipeline.js +476 -0
- package/dist/core/pipeline.js.map +1 -0
- package/dist/core/stream.d.ts +48 -0
- package/dist/core/stream.d.ts.map +1 -0
- package/dist/core/stream.js +72 -0
- package/dist/core/stream.js.map +1 -0
- package/dist/core/types.d.ts +259 -0
- package/dist/core/types.d.ts.map +1 -0
- package/dist/core/types.js +4 -0
- package/dist/core/types.js.map +1 -0
- package/dist/export/index.d.ts +3 -0
- package/dist/export/index.d.ts.map +1 -0
- package/dist/export/index.js +8 -0
- package/dist/export/index.js.map +1 -0
- package/dist/export/templates.d.ts +25 -0
- package/dist/export/templates.d.ts.map +1 -0
- package/dist/export/templates.js +76 -0
- package/dist/export/templates.js.map +1 -0
- package/dist/export/vectordb.d.ts +21 -0
- package/dist/export/vectordb.d.ts.map +1 -0
- package/dist/export/vectordb.js +101 -0
- package/dist/export/vectordb.js.map +1 -0
- package/dist/extractors/content.d.ts +23 -0
- package/dist/extractors/content.d.ts.map +1 -0
- package/dist/extractors/content.js +328 -0
- package/dist/extractors/content.js.map +1 -0
- package/dist/extractors/github.d.ts +19 -0
- package/dist/extractors/github.d.ts.map +1 -0
- package/dist/extractors/github.js +150 -0
- package/dist/extractors/github.js.map +1 -0
- package/dist/extractors/images.d.ts +20 -0
- package/dist/extractors/images.d.ts.map +1 -0
- package/dist/extractors/images.js +73 -0
- package/dist/extractors/images.js.map +1 -0
- package/dist/extractors/pdf.d.ts +11 -0
- package/dist/extractors/pdf.d.ts.map +1 -0
- package/dist/extractors/pdf.js +107 -0
- package/dist/extractors/pdf.js.map +1 -0
- package/dist/extractors/screenshot.d.ts +21 -0
- package/dist/extractors/screenshot.d.ts.map +1 -0
- package/dist/extractors/screenshot.js +85 -0
- package/dist/extractors/screenshot.js.map +1 -0
- package/dist/index.d.ts +70 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +206 -0
- package/dist/index.js.map +1 -0
- package/dist/mcp-server.d.ts +3 -0
- package/dist/mcp-server.d.ts.map +1 -0
- package/dist/mcp-server.js +108 -0
- package/dist/mcp-server.js.map +1 -0
- package/dist/sdk/client.d.ts +48 -0
- package/dist/sdk/client.d.ts.map +1 -0
- package/dist/sdk/client.js +120 -0
- package/dist/sdk/client.js.map +1 -0
- package/dist/sdk/mcp.d.ts +12 -0
- package/dist/sdk/mcp.d.ts.map +1 -0
- package/dist/sdk/mcp.js +146 -0
- package/dist/sdk/mcp.js.map +1 -0
- package/dist/sdk/server.d.ts +5 -0
- package/dist/sdk/server.d.ts.map +1 -0
- package/dist/sdk/server.js +158 -0
- package/dist/sdk/server.js.map +1 -0
- package/dist/search/vector.d.ts +26 -0
- package/dist/search/vector.d.ts.map +1 -0
- package/dist/search/vector.js +142 -0
- package/dist/search/vector.js.map +1 -0
- package/dist/transformers/markdown.d.ts +21 -0
- package/dist/transformers/markdown.d.ts.map +1 -0
- package/dist/transformers/markdown.js +242 -0
- package/dist/transformers/markdown.js.map +1 -0
- package/dist/utils/dedup.d.ts +20 -0
- package/dist/utils/dedup.d.ts.map +1 -0
- package/dist/utils/dedup.js +61 -0
- package/dist/utils/dedup.js.map +1 -0
- package/dist/utils/index.d.ts +6 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +15 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/metrics.d.ts +16 -0
- package/dist/utils/metrics.d.ts.map +1 -0
- package/dist/utils/metrics.js +28 -0
- package/dist/utils/metrics.js.map +1 -0
- package/dist/utils/scheduler.d.ts +19 -0
- package/dist/utils/scheduler.d.ts.map +1 -0
- package/dist/utils/scheduler.js +63 -0
- package/dist/utils/scheduler.js.map +1 -0
- package/dist/utils/sitemap.d.ts +17 -0
- package/dist/utils/sitemap.d.ts.map +1 -0
- package/dist/utils/sitemap.js +118 -0
- package/dist/utils/sitemap.js.map +1 -0
- package/dist/utils/validation.d.ts +142 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +35 -0
- package/dist/utils/validation.js.map +1 -0
- package/dist/utils/webhook.d.ts +21 -0
- package/dist/utils/webhook.d.ts.map +1 -0
- package/dist/utils/webhook.js +108 -0
- package/dist/utils/webhook.js.map +1 -0
- package/package.json +109 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf.d.ts","sourceRoot":"","sources":["../../src/extractors/pdf.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAW,MAAM,eAAe,CAAC;AAE1D;;GAEG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAa;YAEf,UAAU;IAclB,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAuDxD,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;CAK5B"}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.PdfExtractor = void 0;
|
|
27
|
+
const fs_1 = require("fs");
|
|
28
|
+
/**
|
|
29
|
+
* PDF content extractor. Requires optional dependency: npm install pdf-parse
|
|
30
|
+
*/
|
|
31
|
+
class PdfExtractor {
|
|
32
|
+
pdfParse = null;
|
|
33
|
+
async loadParser() {
|
|
34
|
+
if (this.pdfParse)
|
|
35
|
+
return this.pdfParse;
|
|
36
|
+
try {
|
|
37
|
+
// @ts-ignore
|
|
38
|
+
this.pdfParse = (await Promise.resolve().then(() => __importStar(require('pdf-parse')))).default;
|
|
39
|
+
return this.pdfParse;
|
|
40
|
+
}
|
|
41
|
+
catch {
|
|
42
|
+
throw new Error('pdf-parse is required for PDF extraction but is not installed.\n' +
|
|
43
|
+
'Install it with: npm install pdf-parse');
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
async extract(source) {
|
|
47
|
+
const parser = await this.loadParser();
|
|
48
|
+
let buffer;
|
|
49
|
+
if (source.startsWith('http://') || source.startsWith('https://')) {
|
|
50
|
+
const response = await fetch(source, { signal: AbortSignal.timeout(60000) });
|
|
51
|
+
if (!response.ok)
|
|
52
|
+
throw new Error(`Failed to fetch PDF: HTTP ${response.status}`);
|
|
53
|
+
buffer = Buffer.from(await response.arrayBuffer());
|
|
54
|
+
}
|
|
55
|
+
else if ((0, fs_1.existsSync)(source)) {
|
|
56
|
+
buffer = (0, fs_1.readFileSync)(source);
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
throw new Error(`PDF source not found: ${source}`);
|
|
60
|
+
}
|
|
61
|
+
const data = await parser(buffer);
|
|
62
|
+
const text = data.text || '';
|
|
63
|
+
const title = data.info?.Title || source.split('/').pop()?.replace('.pdf', '') || 'Untitled PDF';
|
|
64
|
+
const author = data.info?.Author;
|
|
65
|
+
const pages = data.numpages || 0;
|
|
66
|
+
// Extract headings (lines that look like headings: short, no period, often uppercase or title case)
|
|
67
|
+
const headings = [];
|
|
68
|
+
const lines = text.split('\n');
|
|
69
|
+
for (const line of lines) {
|
|
70
|
+
const trimmed = line.trim();
|
|
71
|
+
if (!trimmed || trimmed.length > 100 || trimmed.endsWith('.'))
|
|
72
|
+
continue;
|
|
73
|
+
if (/^\d+\.\s+[A-Z]/.test(trimmed) || /^[A-Z][A-Z\s]{3,}$/.test(trimmed)) {
|
|
74
|
+
headings.push({ level: /^\d+\.\d+/.test(trimmed) ? 2 : 1, text: trimmed });
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
// Convert to markdown
|
|
78
|
+
let markdown = `# ${title}\n\n`;
|
|
79
|
+
if (author)
|
|
80
|
+
markdown += `> Author: ${author}\n\n`;
|
|
81
|
+
markdown += `> ${pages} pages\n\n`;
|
|
82
|
+
markdown += text;
|
|
83
|
+
return {
|
|
84
|
+
url: source,
|
|
85
|
+
title,
|
|
86
|
+
description: `PDF document: ${title} (${pages} pages)`,
|
|
87
|
+
markdown,
|
|
88
|
+
text,
|
|
89
|
+
codeBlocks: [],
|
|
90
|
+
headings,
|
|
91
|
+
links: [],
|
|
92
|
+
metadata: {
|
|
93
|
+
author,
|
|
94
|
+
type: 'documentation',
|
|
95
|
+
tags: ['pdf'],
|
|
96
|
+
},
|
|
97
|
+
timestamp: new Date().toISOString(),
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
isPdf(url) {
|
|
101
|
+
return url.toLowerCase().endsWith('.pdf') ||
|
|
102
|
+
url.includes('/pdf/') ||
|
|
103
|
+
url.includes('application/pdf');
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
exports.PdfExtractor = PdfExtractor;
|
|
107
|
+
//# sourceMappingURL=pdf.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/extractors/pdf.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,2BAA8C;AAG9C;;GAEG;AACH,MAAa,YAAY;IACf,QAAQ,GAAQ,IAAI,CAAC;IAErB,KAAK,CAAC,UAAU;QACtB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC,QAAQ,CAAC;QACxC,IAAI,CAAC;YACH,aAAa;YACb,IAAI,CAAC,QAAQ,GAAG,CAAC,wDAAa,WAAW,GAAC,CAAC,CAAC,OAAO,CAAC;YACpD,OAAO,IAAI,CAAC,QAAQ,CAAC;QACvB,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CACb,kEAAkE;gBAClE,wCAAwC,CACzC,CAAC;QACJ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,MAAc;QAC1B,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QACvC,IAAI,MAAc,CAAC;QAEnB,IAAI,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,MAAM,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAClE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,EAAE,EAAE,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YAC7E,IAAI,CAAC,QAAQ,CAAC,EAAE;gBAAE,MAAM,IAAI,KAAK,CAAC,6BAA6B,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAClF,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;QACrD,CAAC;aAAM,IAAI,IAAA,eAAU,EAAC,MAAM,CAAC,EAAE,CAAC;YAC9B,MAAM,GAAG,IAAA,iBAAY,EAAC,MAAM,CAAC,CAAC;QAChC,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,KAAK,CAAC,yBAAyB,MAAM,EAAE,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,CAAC;QAClC,MAAM,IAAI,GAAW,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;QACrC,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,EAAE,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI,cAAc,CAAC;QACjG,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC;QACjC,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;QAEjC,oGAAoG;QACpG,MAAM,QAAQ,GAAc,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC/B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC;gBAAE,SAAS;YACxE,IAAI,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,oBAAoB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBACzE,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC;YAC7E,CAAC;QACH,CAAC;QAED,sBAAsB;QACtB,IAAI,QAAQ,GAAG,KAAK,KAAK,MAAM,CAAC;QAChC,IAAI,MAAM;YAAE,QAAQ,IAAI,aAAa,MAAM,MAAM,CAAC;QAClD,QAAQ,IAAI,KAAK,KAAK,YAAY,CAAC;QACnC,QAAQ,IAAI,IAAI,CAAC;QAEjB,OAAO;YACL,GAAG,EAAE,MAAM;YACX,KAAK;YACL,WAAW,EAAE,iBAAiB,KAAK,KAAK,KAAK,SAAS;YACtD,QAAQ;YACR,IAAI;YACJ,UAAU,EAAE,EAAE;YACd,QAAQ;YACR,KAAK,EAAE,EAAE;YACT,QAAQ,EAAE;gBACR,MAAM;gBACN,IAAI,EAAE,eAAe;gBACrB,IAAI,EAAE,CAAC,KAAK,CAAC;aACd;YACD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,GAAW;QACf,OAAO,GAAG,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;YACvC,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC;YACrB,GAAG,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;IACpC,CAAC;CACF;AA7ED,oCA6EC"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Screenshot capture for web pages. Requires Playwright (optional dependency).
|
|
3
|
+
*/
|
|
4
|
+
export declare class ScreenshotCapture {
|
|
5
|
+
private outputDir;
|
|
6
|
+
constructor(outputDir?: string);
|
|
7
|
+
capture(url: string, options?: {
|
|
8
|
+
fullPage?: boolean;
|
|
9
|
+
width?: number;
|
|
10
|
+
height?: number;
|
|
11
|
+
format?: 'png' | 'jpeg';
|
|
12
|
+
quality?: number;
|
|
13
|
+
}): Promise<string>;
|
|
14
|
+
captureMultiple(urls: string[], options?: {
|
|
15
|
+
fullPage?: boolean;
|
|
16
|
+
width?: number;
|
|
17
|
+
height?: number;
|
|
18
|
+
format?: 'png' | 'jpeg';
|
|
19
|
+
}): Promise<string[]>;
|
|
20
|
+
}
|
|
21
|
+
//# sourceMappingURL=screenshot.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"screenshot.d.ts","sourceRoot":"","sources":["../../src/extractors/screenshot.ts"],"names":[],"mappings":"AAGA;;GAEG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,SAAS,CAAS;gBAEd,SAAS,GAAE,MAAwB;IAIzC,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE;QAClC,QAAQ,CAAC,EAAE,OAAO,CAAC;QACnB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,MAAM,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC;QACxB,OAAO,CAAC,EAAE,MAAM,CAAC;KACb,GAAG,OAAO,CAAC,MAAM,CAAC;IAuClB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,OAAO,GAAE;QAC7C,QAAQ,CAAC,EAAE,OAAO,CAAC;QACnB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,MAAM,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC;KACpB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;CAY3B"}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.ScreenshotCapture = void 0;
|
|
27
|
+
const fs_1 = require("fs");
|
|
28
|
+
const path_1 = require("path");
|
|
29
|
+
/**
|
|
30
|
+
* Screenshot capture for web pages. Requires Playwright (optional dependency).
|
|
31
|
+
*/
|
|
32
|
+
class ScreenshotCapture {
|
|
33
|
+
outputDir;
|
|
34
|
+
constructor(outputDir = './screenshots') {
|
|
35
|
+
this.outputDir = outputDir;
|
|
36
|
+
}
|
|
37
|
+
async capture(url, options = {}) {
|
|
38
|
+
let chromium;
|
|
39
|
+
try {
|
|
40
|
+
const pw = await Promise.resolve().then(() => __importStar(require('playwright')));
|
|
41
|
+
chromium = pw.chromium;
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
throw new Error('Playwright is required for screenshots but is not installed.\n' +
|
|
45
|
+
'Install it with: npm install playwright && npx playwright install chromium');
|
|
46
|
+
}
|
|
47
|
+
if (!(0, fs_1.existsSync)(this.outputDir))
|
|
48
|
+
(0, fs_1.mkdirSync)(this.outputDir, { recursive: true });
|
|
49
|
+
const browser = await chromium.launch({ headless: true });
|
|
50
|
+
const context = await browser.newContext({
|
|
51
|
+
viewport: { width: options.width || 1280, height: options.height || 720 },
|
|
52
|
+
});
|
|
53
|
+
const page = await context.newPage();
|
|
54
|
+
try {
|
|
55
|
+
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
|
|
56
|
+
const filename = `${url.replace(/[^a-z0-9]/gi, '_').slice(0, 100)}.${options.format || 'png'}`;
|
|
57
|
+
const filepath = (0, path_1.join)(this.outputDir, filename);
|
|
58
|
+
await page.screenshot({
|
|
59
|
+
path: filepath,
|
|
60
|
+
fullPage: options.fullPage ?? true,
|
|
61
|
+
type: options.format || 'png',
|
|
62
|
+
...(options.format === 'jpeg' && options.quality ? { quality: options.quality } : {}),
|
|
63
|
+
});
|
|
64
|
+
return filepath;
|
|
65
|
+
}
|
|
66
|
+
finally {
|
|
67
|
+
await browser.close();
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
async captureMultiple(urls, options = {}) {
|
|
71
|
+
const results = [];
|
|
72
|
+
for (const url of urls) {
|
|
73
|
+
try {
|
|
74
|
+
const path = await this.capture(url, options);
|
|
75
|
+
results.push(path);
|
|
76
|
+
}
|
|
77
|
+
catch {
|
|
78
|
+
results.push('');
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return results;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
exports.ScreenshotCapture = ScreenshotCapture;
|
|
85
|
+
//# sourceMappingURL=screenshot.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"screenshot.js","sourceRoot":"","sources":["../../src/extractors/screenshot.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,2BAA2C;AAC3C,+BAA4B;AAE5B;;GAEG;AACH,MAAa,iBAAiB;IACpB,SAAS,CAAS;IAE1B,YAAY,YAAoB,eAAe;QAC7C,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,GAAW,EAAE,UAMvB,EAAE;QACJ,IAAI,QAAa,CAAC;QAClB,IAAI,CAAC;YACH,MAAM,EAAE,GAAG,wDAAa,YAAY,GAAC,CAAC;YACtC,QAAQ,GAAG,EAAE,CAAC,QAAQ,CAAC;QACzB,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CACb,gEAAgE;gBAChE,4EAA4E,CAC7E,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,IAAA,eAAU,EAAC,IAAI,CAAC,SAAS,CAAC;YAAE,IAAA,cAAS,EAAC,IAAI,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAEhF,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1D,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;YACvC,QAAQ,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,GAAG,EAAE;SAC1E,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,aAAa,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;YAEnE,MAAM,QAAQ,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,IAAI,KAAK,EAAE,CAAC;YAC/F,MAAM,QAAQ,GAAG,IAAA,WAAI,EAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;YAEhD,MAAM,IAAI,CAAC,UAAU,CAAC;gBACpB,IAAI,EAAE,QAAQ;gBACd,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,IAAI;gBAClC,IAAI,EAAE,OAAO,CAAC,MAAM,IAAI,KAAK;gBAC7B,GAAG,CAAC,OAAO,CAAC,MAAM,KAAK,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aACtF,CAAC,CAAC;YAEH,OAAO,QAAQ,CAAC;QAClB,CAAC;gBAAS,CAAC;YACT,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QACxB,CAAC;IACH,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,IAAc,EAAE,UAKlC,EAAE;QACJ,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC9C,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACrB,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACnB,CAAC;QACH,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;CACF;AArED,8CAqEC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
export * from './core/types';
|
|
2
|
+
export { CrawlPipeline } from './core/pipeline';
|
|
3
|
+
export { CrawlStream } from './core/stream';
|
|
4
|
+
export { BrowserManager } from './browser/manager';
|
|
5
|
+
export { ContentExtractor } from './extractors/content';
|
|
6
|
+
export { PdfExtractor } from './extractors/pdf';
|
|
7
|
+
export { GitHubExtractor } from './extractors/github';
|
|
8
|
+
export { ImageExtractor } from './extractors/images';
|
|
9
|
+
export { ScreenshotCapture } from './extractors/screenshot';
|
|
10
|
+
export { MarkdownTransformer } from './transformers/markdown';
|
|
11
|
+
export { ContentChunker } from './chunking/chunker';
|
|
12
|
+
export { CrawlCache } from './cache/cache';
|
|
13
|
+
export { VectorSearch } from './search/vector';
|
|
14
|
+
export { VectorDBExporter, VectorDBExportOptions, OutputFormatter, OutputTemplate } from './export';
|
|
15
|
+
export { SitemapParser } from './utils/sitemap';
|
|
16
|
+
export { MetricsCollector } from './utils/metrics';
|
|
17
|
+
export { CrawlScheduler } from './utils/scheduler';
|
|
18
|
+
export { Deduplicator } from './utils/dedup';
|
|
19
|
+
export { WebhookNotifier, WebhookConfig } from './utils/webhook';
|
|
20
|
+
export { validateUrl, validateCrawlOptions } from './utils/validation';
|
|
21
|
+
import { WebContextConfig, CrawlOptions, CrawlResult, ContentChunk, SearchResult, MetricsData } from './core/types';
|
|
22
|
+
import { CrawlStream } from './core/stream';
|
|
23
|
+
import { VectorDBExportOptions } from './export';
|
|
24
|
+
import { WebhookConfig } from './utils/webhook';
|
|
25
|
+
/**
|
|
26
|
+
* WebContext - Turn any web content into clean AI-ready context instantly.
|
|
27
|
+
*/
|
|
28
|
+
export declare class WebContext {
|
|
29
|
+
private pipeline;
|
|
30
|
+
private vectorSearch;
|
|
31
|
+
private metrics;
|
|
32
|
+
private webhooks;
|
|
33
|
+
private config;
|
|
34
|
+
constructor(config?: WebContextConfig);
|
|
35
|
+
/** Extract content from a single URL */
|
|
36
|
+
extract(url: string, options?: Partial<CrawlOptions>): Promise<CrawlResult>;
|
|
37
|
+
/** Crawl a documentation site recursively */
|
|
38
|
+
crawlDocs(url: string, options?: Partial<CrawlOptions>): Promise<CrawlResult>;
|
|
39
|
+
/** Extract and return only markdown */
|
|
40
|
+
toMarkdown(url: string, options?: Partial<CrawlOptions>): Promise<string>;
|
|
41
|
+
/** Extract and return chunked content for RAG */
|
|
42
|
+
toChunks(url: string, options?: Partial<CrawlOptions>): Promise<ContentChunk[]>;
|
|
43
|
+
/** Extract and return a context packet optimized for LLM consumption */
|
|
44
|
+
toContext(url: string, options?: Partial<CrawlOptions> & {
|
|
45
|
+
maxTokens?: number;
|
|
46
|
+
}): Promise<string>;
|
|
47
|
+
/** Semantic search within a page's content */
|
|
48
|
+
search(url: string, query: string, topK?: number): Promise<SearchResult[]>;
|
|
49
|
+
/** Extract GitHub README */
|
|
50
|
+
extractReadme(repoUrl: string): Promise<CrawlResult>;
|
|
51
|
+
/** Extract GitHub repo with docs */
|
|
52
|
+
extractGitHub(repoUrl: string, options?: Partial<CrawlOptions>): Promise<CrawlResult>;
|
|
53
|
+
/** Extract content from a PDF (URL or local path) */
|
|
54
|
+
extractPdf(source: string): Promise<CrawlResult>;
|
|
55
|
+
/** Extract API reference */
|
|
56
|
+
extractAPI(url: string): Promise<CrawlResult>;
|
|
57
|
+
/** Export chunks in vector DB format */
|
|
58
|
+
exportForVectorDB(url: string, options?: VectorDBExportOptions & Partial<CrawlOptions>): Promise<string>;
|
|
59
|
+
/** Stream crawl results in real-time */
|
|
60
|
+
extractStream(url: string, options?: Partial<CrawlOptions>): CrawlStream;
|
|
61
|
+
/** Register a webhook for crawl notifications */
|
|
62
|
+
registerWebhook(config: WebhookConfig): void;
|
|
63
|
+
/** Get collected metrics */
|
|
64
|
+
getMetrics(): MetricsData | null;
|
|
65
|
+
/** Cleanup resources */
|
|
66
|
+
dispose(): void;
|
|
67
|
+
private formatForLLM;
|
|
68
|
+
}
|
|
69
|
+
export default WebContext;
|
|
70
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,cAAc,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAC5C,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAC5D,OAAO,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAC9D,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAC3C,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AACpG,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AACjE,OAAO,EAAE,WAAW,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAEvE,OAAO,EACL,gBAAgB,EAAE,YAAY,EAAE,WAAW,EAAE,YAAY,EAC1C,YAAY,EAAE,WAAW,EACzC,MAAM,cAAc,CAAC;AAEtB,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAE5C,OAAO,EAAoB,qBAAqB,EAAE,MAAM,UAAU,CAAC;AACnE,OAAO,EAAmB,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAIjE;;GAEG;AACH,qBAAa,UAAU;IACrB,OAAO,CAAC,QAAQ,CAAgB;IAChC,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,OAAO,CAA0B;IACzC,OAAO,CAAC,QAAQ,CAAgC;IAChD,OAAO,CAAC,MAAM,CAAmB;gBAErB,MAAM,GAAE,gBAAqB;IAOzC,wCAAwC;IAClC,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,OAAO,CAAC,YAAY,CAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAUrF,6CAA6C;IACvC,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,OAAO,CAAC,YAAY,CAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAUvF,uCAAuC;IACjC,UAAU,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,OAAO,CAAC,YAAY,CAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAKnF,iDAAiD;IAC3C,QAAQ,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,OAAO,CAAC,YAAY,CAAM,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAKzF,wEAAwE;IAClE,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,OAAO,CAAC,YAAY,CAAC,GAAG;QAAE,SAAS,CAAC,EAAE,MAAM,CAAA;KAAO,GAAG,OAAO,CAAC,MAAM,CAAC;IAM3G,8CAA8C;IACxC,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAU,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAMnF,4BAA4B;IACtB,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAK1D,oCAAoC;IAC9B,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,OAAO,CAAC,YAAY,CAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAI/F,qDAAqD;IAC/C,UAAU,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAStD,4BAA4B;IACtB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAInD,wCAAwC;IAClC,iBAAiB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,qBAAqB,GAAG,OAAO,CAAC,YAAY,CAAsB,GAAG,OAAO,CAAC,MAAM,CAAC;IAOlI,wCAAwC;IACxC,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,OAAO,CAAC,YAAY,CAAM,GAAG,WAAW;IAe5E,iDAAiD;IACjD,eAAe,CAAC,MAAM,EAAE,aAAa,GAAG,IAAI;IAK5C,4BAA4B;IAC5B,UAAU,IAAI,WAAW,GAAG,IAAI;IAIhC,wBAAwB;IACxB,OAAO,IAAI,IAAI;IAKf,OAAO,CAAC,YAAY;CAiBrB;AAED,eAAe,UAAU,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
exports.WebContext = exports.validateCrawlOptions = exports.validateUrl = exports.WebhookNotifier = exports.Deduplicator = exports.CrawlScheduler = exports.MetricsCollector = exports.SitemapParser = exports.OutputFormatter = exports.VectorDBExporter = exports.VectorSearch = exports.CrawlCache = exports.ContentChunker = exports.MarkdownTransformer = exports.ScreenshotCapture = exports.ImageExtractor = exports.GitHubExtractor = exports.PdfExtractor = exports.ContentExtractor = exports.BrowserManager = exports.CrawlStream = exports.CrawlPipeline = void 0;
|
|
18
|
+
__exportStar(require("./core/types"), exports);
|
|
19
|
+
var pipeline_1 = require("./core/pipeline");
|
|
20
|
+
Object.defineProperty(exports, "CrawlPipeline", { enumerable: true, get: function () { return pipeline_1.CrawlPipeline; } });
|
|
21
|
+
var stream_1 = require("./core/stream");
|
|
22
|
+
Object.defineProperty(exports, "CrawlStream", { enumerable: true, get: function () { return stream_1.CrawlStream; } });
|
|
23
|
+
var manager_1 = require("./browser/manager");
|
|
24
|
+
Object.defineProperty(exports, "BrowserManager", { enumerable: true, get: function () { return manager_1.BrowserManager; } });
|
|
25
|
+
var content_1 = require("./extractors/content");
|
|
26
|
+
Object.defineProperty(exports, "ContentExtractor", { enumerable: true, get: function () { return content_1.ContentExtractor; } });
|
|
27
|
+
var pdf_1 = require("./extractors/pdf");
|
|
28
|
+
Object.defineProperty(exports, "PdfExtractor", { enumerable: true, get: function () { return pdf_1.PdfExtractor; } });
|
|
29
|
+
var github_1 = require("./extractors/github");
|
|
30
|
+
Object.defineProperty(exports, "GitHubExtractor", { enumerable: true, get: function () { return github_1.GitHubExtractor; } });
|
|
31
|
+
var images_1 = require("./extractors/images");
|
|
32
|
+
Object.defineProperty(exports, "ImageExtractor", { enumerable: true, get: function () { return images_1.ImageExtractor; } });
|
|
33
|
+
var screenshot_1 = require("./extractors/screenshot");
|
|
34
|
+
Object.defineProperty(exports, "ScreenshotCapture", { enumerable: true, get: function () { return screenshot_1.ScreenshotCapture; } });
|
|
35
|
+
var markdown_1 = require("./transformers/markdown");
|
|
36
|
+
Object.defineProperty(exports, "MarkdownTransformer", { enumerable: true, get: function () { return markdown_1.MarkdownTransformer; } });
|
|
37
|
+
var chunker_1 = require("./chunking/chunker");
|
|
38
|
+
Object.defineProperty(exports, "ContentChunker", { enumerable: true, get: function () { return chunker_1.ContentChunker; } });
|
|
39
|
+
var cache_1 = require("./cache/cache");
|
|
40
|
+
Object.defineProperty(exports, "CrawlCache", { enumerable: true, get: function () { return cache_1.CrawlCache; } });
|
|
41
|
+
var vector_1 = require("./search/vector");
|
|
42
|
+
Object.defineProperty(exports, "VectorSearch", { enumerable: true, get: function () { return vector_1.VectorSearch; } });
|
|
43
|
+
var export_1 = require("./export");
|
|
44
|
+
Object.defineProperty(exports, "VectorDBExporter", { enumerable: true, get: function () { return export_1.VectorDBExporter; } });
|
|
45
|
+
Object.defineProperty(exports, "OutputFormatter", { enumerable: true, get: function () { return export_1.OutputFormatter; } });
|
|
46
|
+
var sitemap_1 = require("./utils/sitemap");
|
|
47
|
+
Object.defineProperty(exports, "SitemapParser", { enumerable: true, get: function () { return sitemap_1.SitemapParser; } });
|
|
48
|
+
var metrics_1 = require("./utils/metrics");
|
|
49
|
+
Object.defineProperty(exports, "MetricsCollector", { enumerable: true, get: function () { return metrics_1.MetricsCollector; } });
|
|
50
|
+
var scheduler_1 = require("./utils/scheduler");
|
|
51
|
+
Object.defineProperty(exports, "CrawlScheduler", { enumerable: true, get: function () { return scheduler_1.CrawlScheduler; } });
|
|
52
|
+
var dedup_1 = require("./utils/dedup");
|
|
53
|
+
Object.defineProperty(exports, "Deduplicator", { enumerable: true, get: function () { return dedup_1.Deduplicator; } });
|
|
54
|
+
var webhook_1 = require("./utils/webhook");
|
|
55
|
+
Object.defineProperty(exports, "WebhookNotifier", { enumerable: true, get: function () { return webhook_1.WebhookNotifier; } });
|
|
56
|
+
var validation_1 = require("./utils/validation");
|
|
57
|
+
Object.defineProperty(exports, "validateUrl", { enumerable: true, get: function () { return validation_1.validateUrl; } });
|
|
58
|
+
Object.defineProperty(exports, "validateCrawlOptions", { enumerable: true, get: function () { return validation_1.validateCrawlOptions; } });
|
|
59
|
+
const pipeline_2 = require("./core/pipeline");
|
|
60
|
+
const stream_2 = require("./core/stream");
|
|
61
|
+
const vector_2 = require("./search/vector");
|
|
62
|
+
const export_2 = require("./export");
|
|
63
|
+
const webhook_2 = require("./utils/webhook");
|
|
64
|
+
const metrics_2 = require("./utils/metrics");
|
|
65
|
+
const validation_2 = require("./utils/validation");
|
|
66
|
+
/**
|
|
67
|
+
* WebContext - Turn any web content into clean AI-ready context instantly.
|
|
68
|
+
*/
|
|
69
|
+
class WebContext {
|
|
70
|
+
pipeline;
|
|
71
|
+
vectorSearch;
|
|
72
|
+
metrics;
|
|
73
|
+
webhooks = null;
|
|
74
|
+
config;
|
|
75
|
+
constructor(config = {}) {
|
|
76
|
+
this.config = config;
|
|
77
|
+
this.pipeline = new pipeline_2.CrawlPipeline(config);
|
|
78
|
+
this.vectorSearch = new vector_2.VectorSearch();
|
|
79
|
+
this.metrics = config.metrics ? new metrics_2.MetricsCollector() : null;
|
|
80
|
+
}
|
|
81
|
+
/** Extract content from a single URL */
|
|
82
|
+
async extract(url, options = {}) {
|
|
83
|
+
(0, validation_2.validateUrl)(url);
|
|
84
|
+
const start = Date.now();
|
|
85
|
+
const result = await this.pipeline.crawl({ url, depth: 0, ...options });
|
|
86
|
+
if (this.metrics) {
|
|
87
|
+
this.metrics.recordCrawl(result.stats.pagesProcessed, result.stats.totalTokens, Date.now() - start);
|
|
88
|
+
}
|
|
89
|
+
return result;
|
|
90
|
+
}
|
|
91
|
+
/** Crawl a documentation site recursively */
|
|
92
|
+
async crawlDocs(url, options = {}) {
|
|
93
|
+
(0, validation_2.validateUrl)(url);
|
|
94
|
+
const start = Date.now();
|
|
95
|
+
const result = await this.pipeline.crawl({ url, depth: options.depth ?? 3, ...options });
|
|
96
|
+
if (this.metrics) {
|
|
97
|
+
this.metrics.recordCrawl(result.stats.pagesProcessed, result.stats.totalTokens, Date.now() - start);
|
|
98
|
+
}
|
|
99
|
+
return result;
|
|
100
|
+
}
|
|
101
|
+
/** Extract and return only markdown */
|
|
102
|
+
async toMarkdown(url, options = {}) {
|
|
103
|
+
const result = await this.extract(url, options);
|
|
104
|
+
return result.pages.map(p => p.markdown).join('\n\n---\n\n');
|
|
105
|
+
}
|
|
106
|
+
/** Extract and return chunked content for RAG */
|
|
107
|
+
async toChunks(url, options = {}) {
|
|
108
|
+
const result = await this.extract(url, options);
|
|
109
|
+
return result.context.chunks;
|
|
110
|
+
}
|
|
111
|
+
/** Extract and return a context packet optimized for LLM consumption */
|
|
112
|
+
async toContext(url, options = {}) {
|
|
113
|
+
const { maxTokens, ...crawlOptions } = options;
|
|
114
|
+
const result = await this.extract(url, crawlOptions);
|
|
115
|
+
return this.formatForLLM(result.context, maxTokens);
|
|
116
|
+
}
|
|
117
|
+
/** Semantic search within a page's content */
|
|
118
|
+
async search(url, query, topK = 5) {
|
|
119
|
+
const chunks = await this.toChunks(url);
|
|
120
|
+
this.vectorSearch.index(chunks);
|
|
121
|
+
return this.vectorSearch.search(query, topK);
|
|
122
|
+
}
|
|
123
|
+
/** Extract GitHub README */
|
|
124
|
+
async extractReadme(repoUrl) {
|
|
125
|
+
(0, validation_2.validateUrl)(repoUrl);
|
|
126
|
+
return this.extract(repoUrl.replace(/\/$/, ''));
|
|
127
|
+
}
|
|
128
|
+
/** Extract GitHub repo with docs */
|
|
129
|
+
async extractGitHub(repoUrl, options = {}) {
|
|
130
|
+
return this.crawlDocs(repoUrl, { depth: 1, ...options });
|
|
131
|
+
}
|
|
132
|
+
/** Extract content from a PDF (URL or local path) */
|
|
133
|
+
async extractPdf(source) {
|
|
134
|
+
if (source.startsWith('http')) {
|
|
135
|
+
return this.extract(source);
|
|
136
|
+
}
|
|
137
|
+
// Local file — bypass URL validation, call pipeline directly
|
|
138
|
+
const result = await this.pipeline.crawl({ url: source, depth: 0 });
|
|
139
|
+
return result;
|
|
140
|
+
}
|
|
141
|
+
/** Extract API reference */
|
|
142
|
+
async extractAPI(url) {
|
|
143
|
+
return this.extract(url, { focusMode: 'api' });
|
|
144
|
+
}
|
|
145
|
+
/** Export chunks in vector DB format */
|
|
146
|
+
async exportForVectorDB(url, options = { format: 'json' }) {
|
|
147
|
+
const { format, namespace, collection, includeMetadata, ...crawlOptions } = options;
|
|
148
|
+
const result = await this.extract(url, crawlOptions);
|
|
149
|
+
const exporter = new export_2.VectorDBExporter();
|
|
150
|
+
return exporter.exportChunks(result.context.chunks, { format, namespace, collection, includeMetadata });
|
|
151
|
+
}
|
|
152
|
+
/** Stream crawl results in real-time */
|
|
153
|
+
extractStream(url, options = {}) {
|
|
154
|
+
const stream = new stream_2.CrawlStream();
|
|
155
|
+
(0, validation_2.validateUrl)(url);
|
|
156
|
+
// Run crawl async and emit events
|
|
157
|
+
this.pipeline.crawl({ url, depth: 0, ...options, onProgress: (p) => stream.emitProgress(p) })
|
|
158
|
+
.then((result) => {
|
|
159
|
+
for (const page of result.pages)
|
|
160
|
+
stream.emitPage(page);
|
|
161
|
+
stream.emitChunks(result.context.chunks);
|
|
162
|
+
if (result.diffs?.length)
|
|
163
|
+
stream.emitDone(result);
|
|
164
|
+
else
|
|
165
|
+
stream.emitDone(result);
|
|
166
|
+
})
|
|
167
|
+
.catch((err) => stream.emitError({ url, error: err.message }));
|
|
168
|
+
return stream;
|
|
169
|
+
}
|
|
170
|
+
/** Register a webhook for crawl notifications */
|
|
171
|
+
registerWebhook(config) {
|
|
172
|
+
if (!this.webhooks)
|
|
173
|
+
this.webhooks = new webhook_2.WebhookNotifier();
|
|
174
|
+
this.webhooks.register(config);
|
|
175
|
+
}
|
|
176
|
+
/** Get collected metrics */
|
|
177
|
+
getMetrics() {
|
|
178
|
+
return this.metrics?.getMetrics() ?? null;
|
|
179
|
+
}
|
|
180
|
+
/** Cleanup resources */
|
|
181
|
+
dispose() {
|
|
182
|
+
this.vectorSearch.clear();
|
|
183
|
+
this.pipeline.dispose();
|
|
184
|
+
}
|
|
185
|
+
formatForLLM(packet, maxTokens) {
|
|
186
|
+
const budget = maxTokens ?? 8000;
|
|
187
|
+
let output = `# ${packet.source}\n\n`;
|
|
188
|
+
if (packet.summary)
|
|
189
|
+
output += `> ${packet.summary}\n\n`;
|
|
190
|
+
output += `> ${packet.metadata.pageCount} pages | ${packet.totalTokens} tokens | ${packet.metadata.contentType}\n\n`;
|
|
191
|
+
let usedTokens = Math.ceil(output.length / 4);
|
|
192
|
+
for (const chunk of packet.chunks) {
|
|
193
|
+
if (usedTokens + chunk.tokens > budget)
|
|
194
|
+
break;
|
|
195
|
+
if (chunk.metadata.headingPath.length) {
|
|
196
|
+
output += `## ${chunk.metadata.headingPath.join(' > ')}\n\n`;
|
|
197
|
+
}
|
|
198
|
+
output += chunk.content + '\n\n';
|
|
199
|
+
usedTokens += chunk.tokens;
|
|
200
|
+
}
|
|
201
|
+
return output.trim();
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
exports.WebContext = WebContext;
|
|
205
|
+
exports.default = WebContext;
|
|
206
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;AAAA,+CAA6B;AAC7B,4CAAgD;AAAvC,yGAAA,aAAa,OAAA;AACtB,wCAA4C;AAAnC,qGAAA,WAAW,OAAA;AACpB,6CAAmD;AAA1C,yGAAA,cAAc,OAAA;AACvB,gDAAwD;AAA/C,2GAAA,gBAAgB,OAAA;AACzB,wCAAgD;AAAvC,mGAAA,YAAY,OAAA;AACrB,8CAAsD;AAA7C,yGAAA,eAAe,OAAA;AACxB,8CAAqD;AAA5C,wGAAA,cAAc,OAAA;AACvB,sDAA4D;AAAnD,+GAAA,iBAAiB,OAAA;AAC1B,oDAA8D;AAArD,+GAAA,mBAAmB,OAAA;AAC5B,8CAAoD;AAA3C,yGAAA,cAAc,OAAA;AACvB,uCAA2C;AAAlC,mGAAA,UAAU,OAAA;AACnB,0CAA+C;AAAtC,sGAAA,YAAY,OAAA;AACrB,mCAAoG;AAA3F,0GAAA,gBAAgB,OAAA;AAAyB,yGAAA,eAAe,OAAA;AACjE,2CAAgD;AAAvC,wGAAA,aAAa,OAAA;AACtB,2CAAmD;AAA1C,2GAAA,gBAAgB,OAAA;AACzB,+CAAmD;AAA1C,2GAAA,cAAc,OAAA;AACvB,uCAA6C;AAApC,qGAAA,YAAY,OAAA;AACrB,2CAAiE;AAAxD,0GAAA,eAAe,OAAA;AACxB,iDAAuE;AAA9D,yGAAA,WAAW,OAAA;AAAE,kHAAA,oBAAoB,OAAA;AAM1C,8CAAgD;AAChD,0CAA4C;AAC5C,4CAA+C;AAC/C,qCAAmE;AACnE,6CAAiE;AACjE,6CAAmD;AACnD,mDAAiD;AAEjD;;GAEG;AACH,MAAa,UAAU;IACb,QAAQ,CAAgB;IACxB,YAAY,CAAe;IAC3B,OAAO,CAA0B;IACjC,QAAQ,GAA2B,IAAI,CAAC;IACxC,MAAM,CAAmB;IAEjC,YAAY,SAA2B,EAAE;QACvC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,GAAG,IAAI,wBAAa,CAAC,MAAM,CAAC,CAAC;QAC1C,IAAI,CAAC,YAAY,GAAG,IAAI,qBAAY,EAAE,CAAC;QACvC,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,0BAAgB,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IAChE,CAAC;IAED,wCAAwC;IACxC,KAAK,CAAC,OAAO,CAAC,GAAW,EAAE,UAAiC,EAAE;QAC5D,IAAA,wBAAW,EAAC,GAAG,CAAC,CAAC;QACjB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;QACxE,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,MAAM,CAAC,KAAK,CAAC,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,CAAC;QACtG,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,6CAA6C;IAC7C,KAAK,CAAC,SAAS,CAAC,GAAW,EAAE,UAAiC,EAAE;QAC9D,IAAA,wBAAW,EAAC,GAAG,CAAC,CAAC;QACjB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,CAAC,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;QACzF,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,MAAM,CAAC,KAAK,CAAC,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,CAAC;QACtG,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,uCAAuC;IACvC,KAAK,CAAC,UAAU,CAAC,GAAW,EAAE,UAAiC,EAAE;QAC/D,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAChD,OAAO,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IAC/D,CAAC;IAED,iDAAiD;IACjD,KAAK,CAAC,QAAQ,CAAC,GAAW,EAAE,UAAiC,EAAE;QAC7D,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAChD,OAAO,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC;IAC/B,CAAC;IAED,wEAAwE;IACxE,KAAK,CAAC,SAAS,CAAC,GAAW,EAAE,UAA0D,EAAE;QACvF,MAAM,EAAE,SAAS,EAAE,GAAG,YAAY,EAAE,GAAG,OAAO,CAAC;QAC/C,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IACtD,CAAC;IAED,8CAA8C;IAC9C,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,KAAa,EAAE,OAAe,CAAC;QACvD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QACxC,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAChC,OAAO,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IAC/C,CAAC;IAED,4BAA4B;IAC5B,KAAK,CAAC,aAAa,CAAC,OAAe;QACjC,IAAA,wBAAW,EAAC,OAAO,CAAC,CAAC;QACrB,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC;IAClD,CAAC;IAED,oCAAoC;IACpC,KAAK,CAAC,aAAa,CAAC,OAAe,EAAE,UAAiC,EAAE;QACtE,OAAO,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IAC3D,CAAC;IAED,qDAAqD;IACrD,KAAK,CAAC,UAAU,CAAC,MAAc;QAC7B,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAC9B,CAAC;QACD,6DAA6D;QAC7D,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QACpE,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,4BAA4B;IAC5B,KAAK,CAAC,UAAU,CAAC,GAAW;QAC1B,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,CAAC;IACjD,CAAC;IAED,wCAAwC;IACxC,KAAK,CAAC,iBAAiB,CAAC,GAAW,EAAE,UAAyD,EAAE,MAAM,EAAE,MAAM,EAAE;QAC9G,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,eAAe,EAAE,GAAG,YAAY,EAAE,GAAG,OAAO,CAAC;QACpF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;QACrD,MAAM,QAAQ,GAAG,IAAI,yBAAgB,EAAE,CAAC;QACxC,OAAO,QAAQ,CAAC,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,eAAe,EAAE,CAAC,CAAC;IAC1G,CAAC;IAED,wCAAwC;IACxC,aAAa,CAAC,GAAW,EAAE,UAAiC,EAAE;QAC5D,MAAM,MAAM,GAAG,IAAI,oBAAW,EAAE,CAAC;QACjC,IAAA,wBAAW,EAAC,GAAG,CAAC,CAAC;QACjB,kCAAkC;QAClC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC;aAC1F,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE;YACf,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK;gBAAE,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;YACvD,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YACzC,IAAI,MAAM,CAAC,KAAK,EAAE,MAAM;gBAAE,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;;gBAC7C,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC/B,CAAC,CAAC;aACD,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACjE,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,iDAAiD;IACjD,eAAe,CAAC,MAAqB;QACnC,IAAI,CAAC,IAAI,CAAC,QAAQ;YAAE,IAAI,CAAC,QAAQ,GAAG,IAAI,yBAAe,EAAE,CAAC;QAC1D,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IACjC,CAAC;IAED,4BAA4B;IAC5B,UAAU;QACR,OAAO,IAAI,CAAC,OAAO,EAAE,UAAU,EAAE,IAAI,IAAI,CAAC;IAC5C,CAAC;IAED,wBAAwB;IACxB,OAAO;QACL,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,CAAC;QAC1B,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC;IAC1B,CAAC;IAEO,YAAY,CAAC,MAAqB,EAAE,SAAkB;QAC5D,MAAM,MAAM,GAAG,SAAS,IAAI,IAAI,CAAC;QACjC,IAAI,MAAM,GAAG,KAAK,MAAM,CAAC,MAAM,MAAM,CAAC;QACtC,IAAI,MAAM,CAAC,OAAO;YAAE,MAAM,IAAI,KAAK,MAAM,CAAC,OAAO,MAAM,CAAC;QACxD,MAAM,IAAI,KAAK,MAAM,CAAC,QAAQ,CAAC,SAAS,YAAY,MAAM,CAAC,WAAW,aAAa,MAAM,CAAC,QAAQ,CAAC,WAAW,MAAM,CAAC;QAErH,IAAI,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC9C,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClC,IAAI,UAAU,GAAG,KAAK,CAAC,MAAM,GAAG,MAAM;gBAAE,MAAM;YAC9C,IAAI,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC;gBACtC,MAAM,IAAI,MAAM,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;YAC/D,CAAC;YACD,MAAM,IAAI,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC;YACjC,UAAU,IAAI,KAAK,CAAC,MAAM,CAAC;QAC7B,CAAC;QACD,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IACvB,CAAC;CACF;AAlJD,gCAkJC;AAED,kBAAe,UAAU,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mcp-server.d.ts","sourceRoot":"","sources":["../src/mcp-server.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
/**
|
|
5
|
+
* WebContext MCP Server for Amazon Q Developer / Claude Desktop / any MCP client.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* node dist/mcp-server.js
|
|
9
|
+
*
|
|
10
|
+
* This runs as a stdio MCP server that exposes WebContext tools to AI agents.
|
|
11
|
+
*/
|
|
12
|
+
const mcp_1 = require("./sdk/mcp");
|
|
13
|
+
const tools = (0, mcp_1.createMCPTools)({
|
|
14
|
+
cache: { enabled: true, ttl: 3600, maxSize: 500, contentHashing: true },
|
|
15
|
+
retry: { maxRetries: 3, backoffMs: 1000, backoffMultiplier: 2, retryOn: [429, 500, 502, 503, 504] },
|
|
16
|
+
rateLimit: { requestsPerSecond: 2, burstSize: 5 },
|
|
17
|
+
});
|
|
18
|
+
// MCP stdio protocol implementation
|
|
19
|
+
const server = {
|
|
20
|
+
name: 'webcontext',
|
|
21
|
+
version: '1.0.0',
|
|
22
|
+
tools: tools.map(t => ({
|
|
23
|
+
name: t.name,
|
|
24
|
+
description: t.description,
|
|
25
|
+
inputSchema: t.inputSchema,
|
|
26
|
+
})),
|
|
27
|
+
};
|
|
28
|
+
function sendResponse(id, result) {
|
|
29
|
+
const msg = JSON.stringify({ jsonrpc: '2.0', id, result });
|
|
30
|
+
process.stdout.write(`Content-Length: ${Buffer.byteLength(msg)}\r\n\r\n${msg}`);
|
|
31
|
+
}
|
|
32
|
+
function sendError(id, code, message) {
|
|
33
|
+
const msg = JSON.stringify({ jsonrpc: '2.0', id, error: { code, message } });
|
|
34
|
+
process.stdout.write(`Content-Length: ${Buffer.byteLength(msg)}\r\n\r\n${msg}`);
|
|
35
|
+
}
|
|
36
|
+
async function handleRequest(request) {
|
|
37
|
+
const { id, method, params } = request;
|
|
38
|
+
switch (method) {
|
|
39
|
+
case 'initialize':
|
|
40
|
+
sendResponse(id, {
|
|
41
|
+
protocolVersion: '2024-11-05',
|
|
42
|
+
capabilities: { tools: {} },
|
|
43
|
+
serverInfo: { name: server.name, version: server.version },
|
|
44
|
+
});
|
|
45
|
+
break;
|
|
46
|
+
case 'tools/list':
|
|
47
|
+
sendResponse(id, { tools: server.tools });
|
|
48
|
+
break;
|
|
49
|
+
case 'tools/call': {
|
|
50
|
+
const tool = tools.find(t => t.name === params.name);
|
|
51
|
+
if (!tool) {
|
|
52
|
+
sendError(id, -32602, `Unknown tool: ${params.name}`);
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
try {
|
|
56
|
+
const result = await tool.handler(params.arguments || {});
|
|
57
|
+
sendResponse(id, {
|
|
58
|
+
content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
catch (err) {
|
|
62
|
+
sendResponse(id, {
|
|
63
|
+
content: [{ type: 'text', text: `Error: ${err.message}` }],
|
|
64
|
+
isError: true,
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
case 'notifications/initialized':
|
|
70
|
+
// No response needed for notifications
|
|
71
|
+
break;
|
|
72
|
+
default:
|
|
73
|
+
if (id)
|
|
74
|
+
sendError(id, -32601, `Method not found: ${method}`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
// Read stdio input (Content-Length header framing)
|
|
78
|
+
let buffer = '';
|
|
79
|
+
process.stdin.setEncoding('utf-8');
|
|
80
|
+
process.stdin.on('data', (chunk) => {
|
|
81
|
+
buffer += chunk;
|
|
82
|
+
while (true) {
|
|
83
|
+
const headerEnd = buffer.indexOf('\r\n\r\n');
|
|
84
|
+
if (headerEnd === -1)
|
|
85
|
+
break;
|
|
86
|
+
const header = buffer.slice(0, headerEnd);
|
|
87
|
+
const match = header.match(/Content-Length:\s*(\d+)/i);
|
|
88
|
+
if (!match) {
|
|
89
|
+
buffer = buffer.slice(headerEnd + 4);
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
const contentLength = parseInt(match[1], 10);
|
|
93
|
+
const bodyStart = headerEnd + 4;
|
|
94
|
+
if (buffer.length < bodyStart + contentLength)
|
|
95
|
+
break;
|
|
96
|
+
const body = buffer.slice(bodyStart, bodyStart + contentLength);
|
|
97
|
+
buffer = buffer.slice(bodyStart + contentLength);
|
|
98
|
+
try {
|
|
99
|
+
const request = JSON.parse(body);
|
|
100
|
+
handleRequest(request);
|
|
101
|
+
}
|
|
102
|
+
catch (err) {
|
|
103
|
+
sendError(null, -32700, 'Parse error');
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
process.stderr.write('WebContext MCP Server started\n');
|
|
108
|
+
//# sourceMappingURL=mcp-server.js.map
|