@ignidor/web-search-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +267 -0
- package/bin/web-search-mcp.js +13 -0
- package/dist/crawl4ai-client.d.ts +238 -0
- package/dist/crawl4ai-client.d.ts.map +1 -0
- package/dist/crawl4ai-client.js +608 -0
- package/dist/crawl4ai-client.js.map +1 -0
- package/dist/index.d.ts +39 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +561 -0
- package/dist/index.js.map +1 -0
- package/dist/playwright-crawler.d.ts +92 -0
- package/dist/playwright-crawler.d.ts.map +1 -0
- package/dist/playwright-crawler.js +454 -0
- package/dist/playwright-crawler.js.map +1 -0
- package/dist/ranking.d.ts +58 -0
- package/dist/ranking.d.ts.map +1 -0
- package/dist/ranking.js +218 -0
- package/dist/ranking.js.map +1 -0
- package/dist/search.d.ts +15 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/search.js +187 -0
- package/dist/search.js.map +1 -0
- package/dist/types/index.d.ts +131 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +3 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/concurrency.d.ts +24 -0
- package/dist/utils/concurrency.d.ts.map +1 -0
- package/dist/utils/concurrency.js +53 -0
- package/dist/utils/concurrency.js.map +1 -0
- package/dist/utils/validators.d.ts +21 -0
- package/dist/utils/validators.d.ts.map +1 -0
- package/dist/utils/validators.js +75 -0
- package/dist/utils/validators.js.map +1 -0
- package/package.json +77 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
export interface PlaywrightCrawlResult {
|
|
2
|
+
success: boolean;
|
|
3
|
+
url?: string;
|
|
4
|
+
markdown?: string;
|
|
5
|
+
title?: string;
|
|
6
|
+
wordCount?: number;
|
|
7
|
+
links?: {
|
|
8
|
+
internal: number;
|
|
9
|
+
external: number;
|
|
10
|
+
};
|
|
11
|
+
images?: string[];
|
|
12
|
+
error?: string;
|
|
13
|
+
durationMs?: number;
|
|
14
|
+
}
|
|
15
|
+
export interface PlaywrightCrawlOptions {
|
|
16
|
+
timeout?: number;
|
|
17
|
+
waitForSelector?: string;
|
|
18
|
+
includeImages?: boolean;
|
|
19
|
+
executeJs?: string;
|
|
20
|
+
screenshot?: boolean;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Crawls a URL using Playwright and returns markdown content
|
|
24
|
+
*/
|
|
25
|
+
export declare function crawlWithPlaywright(url: string, options?: PlaywrightCrawlOptions): Promise<PlaywrightCrawlResult>;
|
|
26
|
+
/**
|
|
27
|
+
* Batch crawl multiple URLs with concurrency control
|
|
28
|
+
*/
|
|
29
|
+
export declare function batchCrawl(urls: string[], options?: PlaywrightCrawlOptions & {
|
|
30
|
+
concurrency?: number;
|
|
31
|
+
}): Promise<PlaywrightCrawlResult[]>;
|
|
32
|
+
/**
|
|
33
|
+
* Clean up resources
|
|
34
|
+
*/
|
|
35
|
+
export declare function cleanup(): Promise<void>;
|
|
36
|
+
export interface ScreenshotResult {
|
|
37
|
+
success: boolean;
|
|
38
|
+
base64?: string;
|
|
39
|
+
path?: string;
|
|
40
|
+
error?: string;
|
|
41
|
+
}
|
|
42
|
+
export interface PDFResult {
|
|
43
|
+
success: boolean;
|
|
44
|
+
base64?: string;
|
|
45
|
+
path?: string;
|
|
46
|
+
error?: string;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Capture a screenshot of a webpage
|
|
50
|
+
*/
|
|
51
|
+
export declare function captureScreenshot(url: string, options?: {
|
|
52
|
+
waitFor?: number;
|
|
53
|
+
fullPage?: boolean;
|
|
54
|
+
}): Promise<ScreenshotResult>;
|
|
55
|
+
/**
|
|
56
|
+
* Generate a PDF from a webpage
|
|
57
|
+
*/
|
|
58
|
+
export declare function generatePDF(url: string): Promise<PDFResult>;
|
|
59
|
+
export interface JSExecutionResult {
|
|
60
|
+
success: boolean;
|
|
61
|
+
results?: unknown[];
|
|
62
|
+
error?: string;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Execute custom JavaScript on a webpage
|
|
66
|
+
*/
|
|
67
|
+
export declare function executeJS(url: string, scripts: string[]): Promise<JSExecutionResult>;
|
|
68
|
+
export interface StructuredField {
|
|
69
|
+
name: string;
|
|
70
|
+
selector: string;
|
|
71
|
+
type: 'text' | 'html' | 'attribute';
|
|
72
|
+
attribute?: string;
|
|
73
|
+
}
|
|
74
|
+
export interface StructuredExtractionResult {
|
|
75
|
+
success: boolean;
|
|
76
|
+
data?: Record<string, unknown>[];
|
|
77
|
+
error?: string;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Extract structured data using CSS selectors
|
|
81
|
+
*/
|
|
82
|
+
export declare function extractStructured(url: string, baseSelector: string, fields: StructuredField[]): Promise<StructuredExtractionResult>;
|
|
83
|
+
export interface RegexExtractionResult {
|
|
84
|
+
success: boolean;
|
|
85
|
+
matches?: Record<string, string[]>;
|
|
86
|
+
error?: string;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Extract data using regex patterns
|
|
90
|
+
*/
|
|
91
|
+
export declare function extractRegex(url: string, patterns?: string[], customPatterns?: Record<string, string>): Promise<RegexExtractionResult>;
|
|
92
|
+
//# sourceMappingURL=playwright-crawler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright-crawler.d.ts","sourceRoot":"","sources":["../src/playwright-crawler.ts"],"names":[],"mappings":"AAQA,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,OAAO,CAAC;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IAC/C,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,sBAAsB;IACrC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAiCD;;GAEG;AACH,wBAAsB,mBAAmB,CACvC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,sBAA2B,GACnC,OAAO,CAAC,qBAAqB,CAAC,CAwGhC;AA8ED;;GAEG;AACH,wBAAsB,UAAU,CAC9B,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,sBAAsB,GAAG;IAAE,WAAW,CAAC,EAAE,MAAM,CAAA;CAAO,GAC9D,OAAO,CAAC,qBAAqB,EAAE,CAAC,CAiBlC;AAED;;GAEG;AACH,wBAAsB,OAAO,kBAK5B;AAMD,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,OAAO,CAAA;CAAO,GACrD,OAAO,CAAC,gBAAgB,CAAC,CAqC3B;AAED;;GAEG;AACH,wBAAsB,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CAgCjE;AAMD,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,wBAAsB,SAAS,CAC7B,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EAAE,GAChB,OAAO,CAAC,iBAAiB,CAAC,CAkC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,WAAW,CAAC;IACpC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,0BAA0B;IACzC,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,MAAM,EACX,YAAY,EAAE,MAAM,EACpB,MAAM,EAAE,eAAe,EAAE,GACxB,OAAO,CAAC,0BAA0B,CAAC,CAyDrC;AAMD,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;IACnC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AA2BD;;GAEG;AACH,wBAAsB,YAAY,CAChC,GAAG,EAAE,MAAM,EACX,QAAQ,GAAE,MAAM,EAAO,EACvB,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GACtC,OAAO,CAAC,qBAAqB,CAAC,CA6DhC"}
|
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
// Direct Playwright Crawler - No Docker Required!
|
|
2
|
+
import * as cheerio from 'cheerio';
|
|
3
|
+
// ============================================================================
|
|
4
|
+
// Playwright Crawler
|
|
5
|
+
// ============================================================================
|
|
6
|
+
let playwrightInstance = null;
|
|
7
|
+
let browserInstance = null;
|
|
8
|
+
async function getPlaywright() {
|
|
9
|
+
if (playwrightInstance)
|
|
10
|
+
return playwrightInstance;
|
|
11
|
+
try {
|
|
12
|
+
// Dynamic import to avoid hard dependency
|
|
13
|
+
playwrightInstance = await import('playwright');
|
|
14
|
+
return playwrightInstance;
|
|
15
|
+
}
|
|
16
|
+
catch (error) {
|
|
17
|
+
throw new Error('Playwright not installed. Install with: npm install playwright');
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
async function getBrowser() {
|
|
21
|
+
if (browserInstance)
|
|
22
|
+
return browserInstance;
|
|
23
|
+
const pw = await getPlaywright();
|
|
24
|
+
browserInstance = await pw.chromium.launch({
|
|
25
|
+
headless: true,
|
|
26
|
+
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
27
|
+
});
|
|
28
|
+
return browserInstance;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Crawls a URL using Playwright and returns markdown content
|
|
32
|
+
*/
|
|
33
|
+
export async function crawlWithPlaywright(url, options = {}) {
|
|
34
|
+
const startTime = Date.now();
|
|
35
|
+
try {
|
|
36
|
+
const pw = await getPlaywright();
|
|
37
|
+
const browser = await getBrowser();
|
|
38
|
+
const context = await browser.newContext({
|
|
39
|
+
userAgent: 'Mozilla/5.0 (compatible; ClaudeMCP-Bot/1.0)'
|
|
40
|
+
});
|
|
41
|
+
const page = await context.newPage();
|
|
42
|
+
// Set timeout
|
|
43
|
+
const timeout = options.timeout || 30000;
|
|
44
|
+
page.setDefaultTimeout(timeout);
|
|
45
|
+
// Navigate to URL
|
|
46
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout });
|
|
47
|
+
// Wait for selector if specified
|
|
48
|
+
if (options.waitForSelector) {
|
|
49
|
+
await page.waitForSelector(options.waitForSelector, { timeout });
|
|
50
|
+
}
|
|
51
|
+
// Execute custom JavaScript if specified
|
|
52
|
+
if (options.executeJs) {
|
|
53
|
+
await page.evaluate(options.executeJs);
|
|
54
|
+
}
|
|
55
|
+
// Extract content
|
|
56
|
+
const content = await page.evaluate(() => {
|
|
57
|
+
return {
|
|
58
|
+
title: document.title,
|
|
59
|
+
html: document.documentElement.outerHTML,
|
|
60
|
+
text: document.body.innerText
|
|
61
|
+
};
|
|
62
|
+
});
|
|
63
|
+
// Extract links count
|
|
64
|
+
const links = await page.evaluate(() => {
|
|
65
|
+
const anchors = Array.from(document.querySelectorAll('a[href]'));
|
|
66
|
+
let internal = 0;
|
|
67
|
+
let external = 0;
|
|
68
|
+
anchors.forEach((anchor) => {
|
|
69
|
+
const href = anchor.getAttribute('href');
|
|
70
|
+
if (href) {
|
|
71
|
+
if (href.startsWith('/') || href.startsWith(window.location.origin)) {
|
|
72
|
+
internal++;
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
external++;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
return { internal, external };
|
|
80
|
+
});
|
|
81
|
+
// Extract images if requested
|
|
82
|
+
let images = [];
|
|
83
|
+
if (options.includeImages) {
|
|
84
|
+
images = await page.evaluate(() => {
|
|
85
|
+
const imgs = Array.from(document.querySelectorAll('img[src]'));
|
|
86
|
+
return imgs.map((img) => img.src).filter((src) => src && !src.startsWith('data:'));
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
// Generate markdown from HTML
|
|
90
|
+
const $ = cheerio.load(content.html);
|
|
91
|
+
// Remove script and style elements
|
|
92
|
+
$('script, style, nav, footer, iframe').remove();
|
|
93
|
+
// Convert to markdown
|
|
94
|
+
const markdown = htmlToMarkdown($, content.title);
|
|
95
|
+
// Take screenshot if requested
|
|
96
|
+
if (options.screenshot) {
|
|
97
|
+
await page.screenshot({ path: `screenshot-${Date.now()}.png`, fullPage: false });
|
|
98
|
+
}
|
|
99
|
+
await context.close();
|
|
100
|
+
const durationMs = Date.now() - startTime;
|
|
101
|
+
return {
|
|
102
|
+
success: true,
|
|
103
|
+
url,
|
|
104
|
+
title: content.title,
|
|
105
|
+
markdown,
|
|
106
|
+
wordCount: markdown.split(/\s+/).length,
|
|
107
|
+
links,
|
|
108
|
+
images: options.includeImages ? images : undefined,
|
|
109
|
+
durationMs
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
catch (error) {
|
|
113
|
+
const durationMs = Date.now() - startTime;
|
|
114
|
+
return {
|
|
115
|
+
success: false,
|
|
116
|
+
error: error instanceof Error ? error.message : String(error),
|
|
117
|
+
durationMs
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Converts HTML to markdown format
|
|
123
|
+
*/
|
|
124
|
+
function htmlToMarkdown($, title) {
|
|
125
|
+
let markdown = `# ${title}\n\n`;
|
|
126
|
+
// Process headings
|
|
127
|
+
$('h1, h2, h3, h4, h5, h6').each((_, el) => {
|
|
128
|
+
const tagName = el.tagName;
|
|
129
|
+
const level = tagName.charAt(1);
|
|
130
|
+
const text = $(el).text().trim();
|
|
131
|
+
markdown += `${'#'.repeat(parseInt(level))} ${text}\n\n`;
|
|
132
|
+
});
|
|
133
|
+
// Process paragraphs
|
|
134
|
+
$('p').each((_, el) => {
|
|
135
|
+
const text = $(el).text().trim();
|
|
136
|
+
if (text) {
|
|
137
|
+
markdown += `${text}\n\n`;
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
// Process lists
|
|
141
|
+
$('ul, ol').each((_, el) => {
|
|
142
|
+
const isOrdered = el.tagName === 'ol';
|
|
143
|
+
const items = $(el).find('li');
|
|
144
|
+
items.each((index, _) => {
|
|
145
|
+
const text = $(el).find('li').eq(index).text().trim();
|
|
146
|
+
const prefix = isOrdered ? `${index + 1}.` : '-';
|
|
147
|
+
markdown += `${prefix} ${text}\n`;
|
|
148
|
+
});
|
|
149
|
+
markdown += '\n';
|
|
150
|
+
});
|
|
151
|
+
// Process code blocks
|
|
152
|
+
$('pre').each((_, el) => {
|
|
153
|
+
const code = $(el).find('code').text().trim();
|
|
154
|
+
markdown += `\`\`\`\n${code}\n\`\`\`\n\n`;
|
|
155
|
+
});
|
|
156
|
+
// Process links
|
|
157
|
+
$('a[href]').each((_, el) => {
|
|
158
|
+
const href = $(el).attr('href');
|
|
159
|
+
const text = $(el).text().trim();
|
|
160
|
+
if (href && text) {
|
|
161
|
+
markdown += `[${text}](${href})`;
|
|
162
|
+
}
|
|
163
|
+
});
|
|
164
|
+
// Process bold and italic
|
|
165
|
+
$('strong, b').each((_, el) => {
|
|
166
|
+
const text = $(el).text().trim();
|
|
167
|
+
markdown += `**${text}**`;
|
|
168
|
+
});
|
|
169
|
+
$('em, i').each((_, el) => {
|
|
170
|
+
const text = $(el).text().trim();
|
|
171
|
+
markdown += `*${text}*`;
|
|
172
|
+
});
|
|
173
|
+
// If no structured content was found, fall back to body text
|
|
174
|
+
if (markdown === `# ${title}\n\n`) {
|
|
175
|
+
const bodyText = $('body').text().trim();
|
|
176
|
+
const paragraphs = bodyText.split(/\n\n+/);
|
|
177
|
+
markdown = `# ${title}\n\n`;
|
|
178
|
+
paragraphs.forEach((p) => {
|
|
179
|
+
if (p.trim()) {
|
|
180
|
+
markdown += `${p.trim()}\n\n`;
|
|
181
|
+
}
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
return markdown;
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Batch crawl multiple URLs with concurrency control
|
|
188
|
+
*/
|
|
189
|
+
export async function batchCrawl(urls, options = {}) {
|
|
190
|
+
const { concurrency = 3, ...crawlOptions } = options;
|
|
191
|
+
const results = [];
|
|
192
|
+
const queue = [...urls];
|
|
193
|
+
const workers = Array.from({ length: Math.min(concurrency, urls.length) }, async () => {
|
|
194
|
+
while (queue.length > 0) {
|
|
195
|
+
const url = queue.shift();
|
|
196
|
+
const result = await crawlWithPlaywright(url, crawlOptions);
|
|
197
|
+
results.push(result);
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
await Promise.all(workers);
|
|
201
|
+
return results;
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Clean up resources
|
|
205
|
+
*/
|
|
206
|
+
export async function cleanup() {
|
|
207
|
+
if (browserInstance) {
|
|
208
|
+
await browserInstance.close();
|
|
209
|
+
browserInstance = null;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
/**
|
|
213
|
+
* Capture a screenshot of a webpage
|
|
214
|
+
*/
|
|
215
|
+
export async function captureScreenshot(url, options = {}) {
|
|
216
|
+
try {
|
|
217
|
+
const pw = await getPlaywright();
|
|
218
|
+
const browser = await getBrowser();
|
|
219
|
+
const context = await browser.newContext({
|
|
220
|
+
userAgent: 'Mozilla/5.0 (compatible; ClaudeMCP-Bot/1.0)'
|
|
221
|
+
});
|
|
222
|
+
const page = await context.newPage();
|
|
223
|
+
await page.goto(url, { waitUntil: 'networkidle' });
|
|
224
|
+
// Wait extra time if specified
|
|
225
|
+
if (options.waitFor) {
|
|
226
|
+
await page.waitForTimeout(options.waitFor * 1000);
|
|
227
|
+
}
|
|
228
|
+
// Capture screenshot as base64
|
|
229
|
+
const buffer = await page.screenshot({
|
|
230
|
+
fullPage: options.fullPage || false,
|
|
231
|
+
type: 'png'
|
|
232
|
+
});
|
|
233
|
+
await context.close();
|
|
234
|
+
const base64 = buffer.toString('base64');
|
|
235
|
+
return {
|
|
236
|
+
success: true,
|
|
237
|
+
base64,
|
|
238
|
+
path: `screenshot-${Date.now()}.png`
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
catch (error) {
|
|
242
|
+
return {
|
|
243
|
+
success: false,
|
|
244
|
+
error: error instanceof Error ? error.message : String(error)
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Generate a PDF from a webpage
|
|
250
|
+
*/
|
|
251
|
+
export async function generatePDF(url) {
|
|
252
|
+
try {
|
|
253
|
+
const pw = await getPlaywright();
|
|
254
|
+
const browser = await getBrowser();
|
|
255
|
+
const context = await browser.newContext({
|
|
256
|
+
userAgent: 'Mozilla/5.0 (compatible; ClaudeMCP-Bot/1.0)'
|
|
257
|
+
});
|
|
258
|
+
const page = await context.newPage();
|
|
259
|
+
await page.goto(url, { waitUntil: 'networkidle' });
|
|
260
|
+
// Generate PDF as base64
|
|
261
|
+
const buffer = await page.pdf({
|
|
262
|
+
format: 'A4',
|
|
263
|
+
printBackground: true
|
|
264
|
+
});
|
|
265
|
+
await context.close();
|
|
266
|
+
const base64 = buffer.toString('base64');
|
|
267
|
+
return {
|
|
268
|
+
success: true,
|
|
269
|
+
base64,
|
|
270
|
+
path: `output-${Date.now()}.pdf`
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
catch (error) {
|
|
274
|
+
return {
|
|
275
|
+
success: false,
|
|
276
|
+
error: error instanceof Error ? error.message : String(error)
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* Execute custom JavaScript on a webpage
|
|
282
|
+
*/
|
|
283
|
+
export async function executeJS(url, scripts) {
|
|
284
|
+
try {
|
|
285
|
+
const pw = await getPlaywright();
|
|
286
|
+
const browser = await getBrowser();
|
|
287
|
+
const context = await browser.newContext({
|
|
288
|
+
userAgent: 'Mozilla/5.0 (compatible; ClaudeMCP-Bot/1.0)'
|
|
289
|
+
});
|
|
290
|
+
const page = await context.newPage();
|
|
291
|
+
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
292
|
+
const results = [];
|
|
293
|
+
for (const script of scripts) {
|
|
294
|
+
try {
|
|
295
|
+
const result = await page.evaluate(script);
|
|
296
|
+
results.push(result);
|
|
297
|
+
}
|
|
298
|
+
catch (e) {
|
|
299
|
+
results.push({ error: e instanceof Error ? e.message : String(e) });
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
await context.close();
|
|
303
|
+
return {
|
|
304
|
+
success: true,
|
|
305
|
+
results
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
catch (error) {
|
|
309
|
+
return {
|
|
310
|
+
success: false,
|
|
311
|
+
error: error instanceof Error ? error.message : String(error)
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
/**
|
|
316
|
+
* Extract structured data using CSS selectors
|
|
317
|
+
*/
|
|
318
|
+
export async function extractStructured(url, baseSelector, fields) {
|
|
319
|
+
try {
|
|
320
|
+
const pw = await getPlaywright();
|
|
321
|
+
const browser = await getBrowser();
|
|
322
|
+
const context = await browser.newContext({
|
|
323
|
+
userAgent: 'Mozilla/5.0 (compatible; ClaudeMCP-Bot/1.0)'
|
|
324
|
+
});
|
|
325
|
+
const page = await context.newPage();
|
|
326
|
+
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
327
|
+
const data = await page.evaluate((args) => {
|
|
328
|
+
const { baseSel, fieldList } = args;
|
|
329
|
+
const results = [];
|
|
330
|
+
const baseElements = document.querySelectorAll(baseSel);
|
|
331
|
+
baseElements.forEach((el) => {
|
|
332
|
+
const item = {};
|
|
333
|
+
fieldList.forEach((field) => {
|
|
334
|
+
const targetEl = el.querySelector(field.selector);
|
|
335
|
+
if (targetEl) {
|
|
336
|
+
switch (field.type) {
|
|
337
|
+
case 'text':
|
|
338
|
+
item[field.name] = targetEl.textContent?.trim() || '';
|
|
339
|
+
break;
|
|
340
|
+
case 'html':
|
|
341
|
+
item[field.name] = targetEl.innerHTML;
|
|
342
|
+
break;
|
|
343
|
+
case 'attribute':
|
|
344
|
+
item[field.name] = targetEl.getAttribute(field.attribute || '') || '';
|
|
345
|
+
break;
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
else {
|
|
349
|
+
item[field.name] = null;
|
|
350
|
+
}
|
|
351
|
+
});
|
|
352
|
+
results.push(item);
|
|
353
|
+
});
|
|
354
|
+
return results;
|
|
355
|
+
}, { baseSel: baseSelector, fieldList: fields });
|
|
356
|
+
await context.close();
|
|
357
|
+
return {
|
|
358
|
+
success: true,
|
|
359
|
+
data: data
|
|
360
|
+
};
|
|
361
|
+
}
|
|
362
|
+
catch (error) {
|
|
363
|
+
return {
|
|
364
|
+
success: false,
|
|
365
|
+
error: error instanceof Error ? error.message : String(error)
|
|
366
|
+
};
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
// Built-in regex patterns
|
|
370
|
+
const REGEX_PATTERNS = {
|
|
371
|
+
email: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
|
|
372
|
+
phone_intl: /[\+]?[(]?[0-9]{3}[)]?[-\s\.]?[0-9]{3}[-\s\.]?[0-9]{4,6}/g,
|
|
373
|
+
phone_us: /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/g,
|
|
374
|
+
url: /https?:\/\/[^\s<>{}|\\^`\[\]]+/g,
|
|
375
|
+
ipv4: /\b(?:\d{1,3}\.){3}\d{1,3}\b/g,
|
|
376
|
+
ipv6: /(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\b/g,
|
|
377
|
+
uuid: /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi,
|
|
378
|
+
currency: /[$€£¥₹]\s?\d+(?:,\d{3})*(?:\.\d{2})?|\d+(?:,\d{3})*(?:\.\d{2})?\s?[$€£¥₹]/g,
|
|
379
|
+
percentage: /\b\d+(\.\d+)?%\b/g,
|
|
380
|
+
number: /\b\d+(?:,\d{3})*(?:\.\d+)?\b/g,
|
|
381
|
+
date_iso: /\b\d{4}-\d{2}-\d{2}\b/g,
|
|
382
|
+
date_us: /\b(?:0[1-9]|1[0-2])\/(?:0[1-9]|[12][0-9]|3[01])\/\d{4}\b/g,
|
|
383
|
+
time_24h: /\b([01]?[0-9]|2[0-3]):[0-5][0-9]\b/g,
|
|
384
|
+
postal_us: /\b\d{5}(-\d{4})?\b/g,
|
|
385
|
+
postal_uk: /[A-Z]{1,2}\d[A-Z\d]? ?\d[A-Z]{2}/gi,
|
|
386
|
+
hex_color: /#([0-9a-fA-F]{3}|[0-9a-fA-F]{6})\b/g,
|
|
387
|
+
twitter_handle: /@[\w]{1,15}\b/g,
|
|
388
|
+
hashtag: /#[\w]+/g,
|
|
389
|
+
mac_addr: /([0-9A-Fa-f]{2}[:-]){5}([0-9A-Fa-f]{2})/g,
|
|
390
|
+
iban: /[A-Z]{2}\d{2}[A-Z0-9]{11,30}/g,
|
|
391
|
+
credit_card: /\b(?:\d[ -]*?){13,16}\b/g
|
|
392
|
+
};
|
|
393
|
+
/**
|
|
394
|
+
* Extract data using regex patterns
|
|
395
|
+
*/
|
|
396
|
+
export async function extractRegex(url, patterns = [], customPatterns) {
|
|
397
|
+
try {
|
|
398
|
+
const pw = await getPlaywright();
|
|
399
|
+
const browser = await getBrowser();
|
|
400
|
+
const context = await browser.newContext({
|
|
401
|
+
userAgent: 'Mozilla/5.0 (compatible; ClaudeMCP-Bot/1.0)'
|
|
402
|
+
});
|
|
403
|
+
const page = await context.newPage();
|
|
404
|
+
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
405
|
+
// Get page text
|
|
406
|
+
const text = await page.evaluate(() => document.body.innerText);
|
|
407
|
+
const matches = {};
|
|
408
|
+
// Process built-in patterns
|
|
409
|
+
for (const patternName of patterns) {
|
|
410
|
+
if (patternName === 'all') {
|
|
411
|
+
// Run all built-in patterns
|
|
412
|
+
for (const [name, regex] of Object.entries(REGEX_PATTERNS)) {
|
|
413
|
+
const found = text.match(regex);
|
|
414
|
+
if (found) {
|
|
415
|
+
matches[name] = [...new Set(found)]; // Deduplicate
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
else if (REGEX_PATTERNS[patternName]) {
|
|
420
|
+
const found = text.match(REGEX_PATTERNS[patternName]);
|
|
421
|
+
if (found) {
|
|
422
|
+
matches[patternName] = [...new Set(found)]; // Deduplicate
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
// Process custom patterns
|
|
427
|
+
if (customPatterns) {
|
|
428
|
+
for (const [name, pattern] of Object.entries(customPatterns)) {
|
|
429
|
+
try {
|
|
430
|
+
const regex = new RegExp(pattern, 'g');
|
|
431
|
+
const found = text.match(regex);
|
|
432
|
+
if (found) {
|
|
433
|
+
matches[name] = [...new Set(found)]; // Deduplicate
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
catch {
|
|
437
|
+
// Skip invalid regex patterns
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
await context.close();
|
|
442
|
+
return {
|
|
443
|
+
success: true,
|
|
444
|
+
matches
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
catch (error) {
|
|
448
|
+
return {
|
|
449
|
+
success: false,
|
|
450
|
+
error: error instanceof Error ? error.message : String(error)
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
//# sourceMappingURL=playwright-crawler.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright-crawler.js","sourceRoot":"","sources":["../src/playwright-crawler.ts"],"names":[],"mappings":"AAAA,kDAAkD;AAClD,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AA2BnC,+EAA+E;AAC/E,qBAAqB;AACrB,+EAA+E;AAE/E,IAAI,kBAAkB,GAAQ,IAAI,CAAC;AACnC,IAAI,eAAe,GAAQ,IAAI,CAAC;AAEhC,KAAK,UAAU,aAAa;IAC1B,IAAI,kBAAkB;QAAE,OAAO,kBAAkB,CAAC;IAElD,IAAI,CAAC;QACH,0CAA0C;QAC1C,kBAAkB,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;QAChD,OAAO,kBAAkB,CAAC;IAC5B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,gEAAgE,CAAC,CAAC;IACpF,CAAC;AACH,CAAC;AAED,KAAK,UAAU,UAAU;IACvB,IAAI,eAAe;QAAE,OAAO,eAAe,CAAC;IAE5C,MAAM,EAAE,GAAG,MAAM,aAAa,EAAE,CAAC;IACjC,eAAe,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;QACzC,QAAQ,EAAE,IAAI;QACd,IAAI,EAAE,CAAC,cAAc,EAAE,0BAA0B,CAAC;KACnD,CAAC,CAAC;IAEH,OAAO,eAAe,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,GAAW,EACX,UAAkC,EAAE;IAEpC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,MAAM,aAAa,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;YACvC,SAAS,EAAE,6CAA6C;SACzD,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,cAAc;QACd,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,KAAK,CAAC;QACzC,IAAI,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAEhC,kBAAkB;QAClB,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,kBAAkB,EAAE,OAAO,EAAE,CAAC,CAAC;QAEjE,iCAAiC;QACjC,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;YAC5B,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;QACnE,CAAC;QAED,yCAAyC;QACzC,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QACzC,CAAC;QAED,kBAAkB;QAClB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;YACvC,OAAO;gBACL,KAAK,EAAG,QAAgB,CAAC,KAAK;gBAC9B,IAAI,EAAG,QAAgB,CAAC,eAAe,CAAC,SAAS;gBACjD,IAAI,EAAG,QAAgB,CAAC,IAAI,CAAC,SAAS;aACvC,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,sBAAsB;QACtB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;YACrC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAE,QAAgB,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC,CAAC;YAC1E,IAAI,QAAQ,GAAG,CAAC,CAAC;YACjB,IAAI,QAAQ,GAAG,CAAC,CAAC;YAEjB,OAAO,CAAC,OAAO,CAAC,CAAC,MAAW,EAAE,EAAE;gBAC9B,MAAM,IAAI,GAAG,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;gBACzC,IAAI,IAAI,EAAE,CAAC;oBACT,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;wBACpE,QAAQ,EAAE,CAAC;oBACb,CAAC;yBAAM,CAAC;wBACN,QAAQ,EAAE,CAAC;oBACb,CAAC;gBACH,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC;QAChC,CAAC,CAAC,CAAC;QAEH,8BAA8B;QAC9B,IAAI,MAAM,GAAa,EAAE,CAAC;QAC1B,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;YAC1B,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;gBAChC,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAE,QAAgB,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC,CAAC;gBACxE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAQ,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,GAAW,EAAE,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC;YAClG,CAAC,CAAC,CAAC;QACL,CAAC;QAED,8BAA8B;QAC9B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAErC,mCAAmC;QACnC,CAAC,CAAC,oCAAoC,CAAC,CAAC,MAAM,EAAE,CAAC;QAEjD,sBAAsB;QACtB,MAAM,QAAQ,GAAG,cAAc,CAAC,CAAC,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;QAElD,+BAA+B;QAC/B,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;YACvB,MAAM,IAAI,CAAC,UAAU,CAAC,EAAE,IAAI,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QACnF,CAAC;QAED,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QAEtB,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAE1C,OAAO;YACL,OAAO,EAAE,IAAI;YACb,GAAG;YACH,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,QAAQ;YACR,SAAS,EAAE,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;YACvC,KAAK;YACL,MAAM,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;YAClD,UAAU;SACX,CAAC;IAEJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAE1C,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;YAC7D,UAAU;SACX,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,CAAkC,EAAE,KAAa;IACvE,IAAI,QAAQ,GAAG,KAAK,KAAK,MAAM,CAAC;IAEhC,mBAAmB;IACnB,CAAC,CAAC,wBAAwB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAO,EAAE,EAAE;QACtD,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC;QAC3B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,QAAQ,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,IAAI,IAAI,MAAM,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,qBAAqB;IACrB,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAO,EAAE,EAAE;QACjC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,IAAI,IAAI,EAAE,CAAC;YACT,QAAQ,IAAI,GAAG,IAAI,MAAM,CAAC;QAC5B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,gBAAgB;IAChB,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAO,EAAE,EAAE;QACtC,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,KAAK,IAAI,CAAC;QACtC,MAAM,KAAK,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE/B,KAAK,CAAC,IAAI,CAAC,CAAC,KAAa,EAAE,CAAM,EAAE,EAAE;YACnC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YACtD,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;YACjD,QAAQ,IAAI,GAAG,MAAM,IAAI,IAAI,IAAI,CAAC;QACpC,CAAC,CAAC,CAAC;QACH,QAAQ,IAAI,IAAI,CAAC;IACnB,CAAC,CAAC,CAAC;IAEH,sBAAsB;IACtB,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAO,EAAE,EAAE;QACnC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QAC9C,QAAQ,IAAI,WAAW,IAAI,cAAc,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,gBAAgB;IAChB,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAO,EAAE,EAAE;QACvC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,IAAI,IAAI,IAAI,IAAI,EAAE,CAAC;YACjB,QAAQ,IAAI,IAAI,IAAI,KAAK,IAAI,GAAG,CAAC;QACnC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,0BAA0B;IAC1B,CAAC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAO,EAAE,EAAE;QACzC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,QAAQ,IAAI,KAAK,IAAI,IAAI,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAO,EAAE,EAAE;QACrC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,QAAQ,IAAI,IAAI,IAAI,GAAG,CAAC;IAC1B,CAAC,CAAC,CAAC;IAEH,6DAA6D;IAC7D,IAAI,QAAQ,KAAK,KAAK,KAAK,MAAM,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACzC,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC3C,QAAQ,GAAG,KAAK,KAAK,MAAM,CAAC;QAC5B,UAAU,CAAC,OAAO,CAAC,CAAC,CAAS,EAAE,EAAE;YAC/B,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;gBACb,QAAQ,IAAI,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC;YAChC,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,IAAc,EACd,UAA6D,EAAE;IAE/D,MAAM,EAAE,WAAW,GAAG,CAAC,EAAE,GAAG,YAAY,EAAE,GAAG,OAAO,CAAC;IAErD,MAAM,OAAO,GAA4B,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;IAExB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,KAAK,IAAI,EAAE;QACpF,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,EAAG,CAAC;YAC3B,MAAM,MAAM,GAAG,MAAM,mBAAmB,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;YAC5D,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAE3B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO;IAC3B,IAAI,eAAe,EAAE,CAAC;QACpB,MAAM,eAAe,CAAC,KAAK,EAAE,CAAC;QAC9B,eAAe,GAAG,IAAI,CAAC;IACzB,CAAC;AACH,CAAC;AAoBD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,GAAW,EACX,UAAoD,EAAE;IAEtD,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,MAAM,aAAa,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;YACvC,SAAS,EAAE,6CAA6C;SACzD,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,aAAa,EAAE,CAAC,CAAC;QAEnD,+BAA+B;QAC/B,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;YACpB,MAAM,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;QACpD,CAAC;QAED,+BAA+B;QAC/B,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC;YACnC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,KAAK;YACnC,IAAI,EAAE,KAAK;SACZ,CAAC,CAAC;QAEH,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QAEtB,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAEzC,OAAO;YACL,OAAO,EAAE,IAAI;YACb,MAAM;YACN,IAAI,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,MAAM;SACrC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,GAAW;IAC3C,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,MAAM,aAAa,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;YACvC,SAAS,EAAE,6CAA6C;SACzD,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,aAAa,EAAE,CAAC,CAAC;QAEnD,yBAAyB;QACzB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC;YAC5B,MAAM,EAAE,IAAI;YACZ,eAAe,EAAE,IAAI;SACtB,CAAC,CAAC;QAEH,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QAEtB,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAEzC,OAAO;YACL,OAAO,EAAE,IAAI;YACb,MAAM;YACN,IAAI,EAAE,UAAU,IAAI,CAAC,GAAG,EAAE,MAAM;SACjC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC;IACJ,CAAC;AACH,CAAC;AAYD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,GAAW,EACX,OAAiB;IAEjB,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,MAAM,aAAa,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;YACvC,SAAS,EAAE,6CAA6C;SACzD,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,kBAAkB,EAAE,CAAC,CAAC;QAExD,MAAM,OAAO,GAAc,EAAE,CAAC;QAE9B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;gBAC3C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACtE,CAAC;QACH,CAAC;QAED,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QAEtB,OAAO;YACL,OAAO,EAAE,IAAI;YACb,OAAO;SACR,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC;IACJ,CAAC;AACH,CAAC;AAmBD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,GAAW,EACX,YAAoB,EACpB,MAAyB;IAEzB,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,MAAM,aAAa,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;YACvC,SAAS,EAAE,6CAA6C;SACzD,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,kBAAkB,EAAE,CAAC,CAAC;QAExD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAuD,EAAE,EAAE;YAC3F,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC;YACpC,MAAM,OAAO,GAA8B,EAAE,CAAC;YAC9C,MAAM,YAAY,GAAG,QAAQ,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;YAExD,YAAY,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;gBAC1B,MAAM,IAAI,GAA4B,EAAE,CAAC;gBAEzC,SAAS,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;oBAC1B,MAAM,QAAQ,GAAI,EAAc,CAAC,aAAa,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;oBAE/D,IAAI,QAAQ,EAAE,CAAC;wBACb,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;4BACnB,KAAK,MAAM;gCACT,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;gCACtD,MAAM;4BACR,KAAK,MAAM;gCACT,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,SAAS,CAAC;gCACtC,MAAM;4BACR,KAAK,WAAW;gCACd,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,YAAY,CAAC,KAAK,CAAC,SAAS,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gCACtE,MAAM;wBACV,CAAC;oBACH,CAAC;yBAAM,CAAC;wBACN,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;oBAC1B,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEH,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACrB,CAAC,CAAC,CAAC;YAEH,OAAO,OAAO,CAAC;QACjB,CAAC,EAAE,EAAE,OAAO,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC;QAEjD,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QAEtB,OAAO;YACL,OAAO,EAAE,IAAI;YACb,IAAI,EAAE,IAAiC;SACxC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC;IACJ,CAAC;AACH,CAAC;AAYD,0BAA0B;AAC1B,MAAM,cAAc,GAA2B;IAC7C,KAAK,EAAE,iDAAiD;IACxD,UAAU,EAAE,0DAA0D;IACtE,QAAQ,EAAE,gCAAgC;IAC1C,GAAG,EAAE,iCAAiC;IACtC,IAAI,EAAE,8BAA8B;IACpC,IAAI,EAAE,wpBAAwpB;IAC9pB,IAAI,EAAE,gEAAgE;IACtE,QAAQ,EAAE,4EAA4E;IACtF,UAAU,EAAE,mBAAmB;IAC/B,MAAM,EAAE,+BAA+B;IACvC,QAAQ,EAAE,wBAAwB;IAClC,OAAO,EAAE,2DAA2D;IACpE,QAAQ,EAAE,qCAAqC;IAC/C,SAAS,EAAE,qBAAqB;IAChC,SAAS,EAAE,oCAAoC;IAC/C,SAAS,EAAE,qCAAqC;IAChD,cAAc,EAAE,gBAAgB;IAChC,OAAO,EAAE,SAAS;IAClB,QAAQ,EAAE,0CAA0C;IACpD,IAAI,EAAE,+BAA+B;IACrC,WAAW,EAAE,0BAA0B;CACxC,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,GAAW,EACX,WAAqB,EAAE,EACvB,cAAuC;IAEvC,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,MAAM,aAAa,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;YACvC,SAAS,EAAE,6CAA6C;SACzD,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,kBAAkB,EAAE,CAAC,CAAC;QAExD,gBAAgB;QAChB,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEhE,MAAM,OAAO,GAA6B,EAAE,CAAC;QAE7C,4BAA4B;QAC5B,KAAK,MAAM,WAAW,IAAI,QAAQ,EAAE,CAAC;YACnC,IAAI,WAAW,KAAK,KAAK,EAAE,CAAC;gBAC1B,4BAA4B;gBAC5B,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE,CAAC;oBAC3D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAoB,CAAC;oBACnD,IAAI,KAAK,EAAE,CAAC;wBACV,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,cAAc;oBACrD,CAAC;gBACH,CAAC;YACH,CAAC;iBAAM,IAAI,cAAc,CAAC,WAAW,CAAC,EAAE,CAAC;gBACvC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,WAAW,CAAC,CAAoB,CAAC;gBACzE,IAAI,KAAK,EAAE,CAAC;oBACV,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,cAAc;gBAC5D,CAAC;YACH,CAAC;QACH,CAAC;QAED,0BAA0B;QAC1B,IAAI,cAAc,EAAE,CAAC;YACnB,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE,CAAC;gBAC7D,IAAI,CAAC;oBACH,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;oBACvC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAoB,CAAC;oBACnD,IAAI,KAAK,EAAE,CAAC;wBACV,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,cAAc;oBACrD,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,8BAA8B;gBAChC,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QAEtB,OAAO;YACL,OAAO,EAAE,IAAI;YACb,OAAO;SACR,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
export interface Document {
|
|
2
|
+
id: string;
|
|
3
|
+
content: string;
|
|
4
|
+
metadata?: Record<string, unknown>;
|
|
5
|
+
}
|
|
6
|
+
export interface RankedDocument {
|
|
7
|
+
doc: Document;
|
|
8
|
+
score: number;
|
|
9
|
+
breakdown?: {
|
|
10
|
+
bm25: number;
|
|
11
|
+
freshness: number;
|
|
12
|
+
domain: number;
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
export interface HybridScoreConfig {
|
|
16
|
+
alpha: number;
|
|
17
|
+
beta: number;
|
|
18
|
+
gamma: number;
|
|
19
|
+
}
|
|
20
|
+
export interface SearchResult {
|
|
21
|
+
position: number;
|
|
22
|
+
score: number;
|
|
23
|
+
title: string;
|
|
24
|
+
url: string;
|
|
25
|
+
snippet: string;
|
|
26
|
+
provider: 'duckduckgo' | 'google';
|
|
27
|
+
domain: string;
|
|
28
|
+
publishedDate?: string;
|
|
29
|
+
}
|
|
30
|
+
export interface RawSearchResult {
|
|
31
|
+
title: string;
|
|
32
|
+
url: string;
|
|
33
|
+
description: string;
|
|
34
|
+
provider: 'google' | 'duckduckgo';
|
|
35
|
+
}
|
|
36
|
+
export declare class BM25Ranker {
|
|
37
|
+
private bm25;
|
|
38
|
+
private k1;
|
|
39
|
+
private b;
|
|
40
|
+
constructor(k1?: number, b?: number);
|
|
41
|
+
indexDocuments(documents: Document[]): void;
|
|
42
|
+
indexSearchResults(results: RawSearchResult[]): void;
|
|
43
|
+
rank(query: string, documents: Document[]): RankedDocument[];
|
|
44
|
+
rankSearchResults(query: string, results: RawSearchResult[]): SearchResult[];
|
|
45
|
+
private extractDomain;
|
|
46
|
+
}
|
|
47
|
+
export declare class HybridRanker {
|
|
48
|
+
private bm25Ranker;
|
|
49
|
+
private config;
|
|
50
|
+
constructor(config?: Partial<HybridScoreConfig>);
|
|
51
|
+
rank(query: string, documents: Document[]): RankedDocument[];
|
|
52
|
+
rankSearchResults(query: string, results: RawSearchResult[]): SearchResult[];
|
|
53
|
+
private calculateFreshnessScore;
|
|
54
|
+
private calculateDomainScore;
|
|
55
|
+
private extractDomain;
|
|
56
|
+
}
|
|
57
|
+
export declare function rerankWithFullContent(query: string, results: SearchResult[], extractedContent: Map<string, string>): SearchResult[];
|
|
58
|
+
//# sourceMappingURL=ranking.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ranking.d.ts","sourceRoot":"","sources":["../src/ranking.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,QAAQ,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE;QACV,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;CACH;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,YAAY,GAAG,QAAQ,CAAC;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,QAAQ,GAAG,YAAY,CAAC;CACnC;AAMD,qBAAa,UAAU;IACrB,OAAO,CAAC,IAAI,CAAqB;IACjC,OAAO,CAAC,EAAE,CAAe;IACzB,OAAO,CAAC,CAAC,CAAgB;gBAEb,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,MAAM;IAKnC,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,IAAI;IAM3C,kBAAkB,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG,IAAI;IASpD,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,GAAG,cAAc,EAAE;IAe5D,iBAAiB,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,EAAE,GAAG,YAAY,EAAE;IAsB5E,OAAO,CAAC,aAAa;CAOtB;AAMD,qBAAa,YAAY;IACvB,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,MAAM,CAAoB;gBAEtB,MAAM,CAAC,EAAE,OAAO,CAAC,iBAAiB,CAAC;IAU/C,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,GAAG,cAAc,EAAE;IAuB5D,iBAAiB,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,EAAE,GAAG,YAAY,EAAE;IAqB5E,OAAO,CAAC,uBAAuB;IAgB/B,OAAO,CAAC,oBAAoB;IAkD5B,OAAO,CAAC,aAAa;CAOtB;AAMD,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,YAAY,EAAE,EACvB,gBAAgB,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GACpC,YAAY,EAAE,CAiChB"}
|