@ignidor/web-search-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
1
+ export interface PlaywrightCrawlResult {
2
+ success: boolean;
3
+ url?: string;
4
+ markdown?: string;
5
+ title?: string;
6
+ wordCount?: number;
7
+ links?: {
8
+ internal: number;
9
+ external: number;
10
+ };
11
+ images?: string[];
12
+ error?: string;
13
+ durationMs?: number;
14
+ }
15
+ export interface PlaywrightCrawlOptions {
16
+ timeout?: number;
17
+ waitForSelector?: string;
18
+ includeImages?: boolean;
19
+ executeJs?: string;
20
+ screenshot?: boolean;
21
+ }
22
+ /**
23
+ * Crawls a URL using Playwright and returns markdown content
24
+ */
25
+ export declare function crawlWithPlaywright(url: string, options?: PlaywrightCrawlOptions): Promise<PlaywrightCrawlResult>;
26
+ /**
27
+ * Batch crawl multiple URLs with concurrency control
28
+ */
29
+ export declare function batchCrawl(urls: string[], options?: PlaywrightCrawlOptions & {
30
+ concurrency?: number;
31
+ }): Promise<PlaywrightCrawlResult[]>;
32
+ /**
33
+ * Clean up resources
34
+ */
35
+ export declare function cleanup(): Promise<void>;
36
+ export interface ScreenshotResult {
37
+ success: boolean;
38
+ base64?: string;
39
+ path?: string;
40
+ error?: string;
41
+ }
42
+ export interface PDFResult {
43
+ success: boolean;
44
+ base64?: string;
45
+ path?: string;
46
+ error?: string;
47
+ }
48
+ /**
49
+ * Capture a screenshot of a webpage
50
+ */
51
+ export declare function captureScreenshot(url: string, options?: {
52
+ waitFor?: number;
53
+ fullPage?: boolean;
54
+ }): Promise<ScreenshotResult>;
55
+ /**
56
+ * Generate a PDF from a webpage
57
+ */
58
+ export declare function generatePDF(url: string): Promise<PDFResult>;
59
+ export interface JSExecutionResult {
60
+ success: boolean;
61
+ results?: unknown[];
62
+ error?: string;
63
+ }
64
+ /**
65
+ * Execute custom JavaScript on a webpage
66
+ */
67
+ export declare function executeJS(url: string, scripts: string[]): Promise<JSExecutionResult>;
68
+ export interface StructuredField {
69
+ name: string;
70
+ selector: string;
71
+ type: 'text' | 'html' | 'attribute';
72
+ attribute?: string;
73
+ }
74
+ export interface StructuredExtractionResult {
75
+ success: boolean;
76
+ data?: Record<string, unknown>[];
77
+ error?: string;
78
+ }
79
+ /**
80
+ * Extract structured data using CSS selectors
81
+ */
82
+ export declare function extractStructured(url: string, baseSelector: string, fields: StructuredField[]): Promise<StructuredExtractionResult>;
83
+ export interface RegexExtractionResult {
84
+ success: boolean;
85
+ matches?: Record<string, string[]>;
86
+ error?: string;
87
+ }
88
+ /**
89
+ * Extract data using regex patterns
90
+ */
91
+ export declare function extractRegex(url: string, patterns?: string[], customPatterns?: Record<string, string>): Promise<RegexExtractionResult>;
92
+ //# sourceMappingURL=playwright-crawler.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"playwright-crawler.d.ts","sourceRoot":"","sources":["../src/playwright-crawler.ts"],"names":[],"mappings":"AAQA,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,OAAO,CAAC;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IAC/C,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,sBAAsB;IACrC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAiCD;;GAEG;AACH,wBAAsB,mBAAmB,CACvC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,sBAA2B,GACnC,OAAO,CAAC,qBAAqB,CAAC,CAwGhC;AA8ED;;GAEG;AACH,wBAAsB,UAAU,CAC9B,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,sBAAsB,GAAG;IAAE,WAAW,CAAC,EAAE,MAAM,CAAA;CAAO,GAC9D,OAAO,CAAC,qBAAqB,EAAE,CAAC,CAiBlC;AAED;;GAEG;AACH,wBAAsB,OAAO,kBAK5B;AAMD,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,OAAO,CAAA;CAAO,GACrD,OAAO,CAAC,gBAAgB,CAAC,CAqC3B;AAED;;GAEG;AACH,wBAAsB,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CAgCjE;AAMD,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,wBAAsB,SAAS,CAC7B,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EAAE,GAChB,OAAO,CAAC,iBAAiB,CAAC,CAkC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,WAAW,CAAC;IACpC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,0BAA0B;IACzC,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,MAAM,EACX,YAAY,EAAE,MAAM,EACpB,MAAM,EAAE,eAAe,EAAE,GACxB,OAAO,CAAC,0BAA0B,CAAC,CAyDrC;AAMD,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;IACnC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AA2BD;;GAEG;AACH,wBAAsB,YAAY,CAChC,GAAG,EAAE,MAAM,EACX,QAAQ,GAAE,MAAM,EAAO,EACvB,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GACtC,OAAO,CAAC,qBAAqB,CAAC,CA6DhC"}
@@ -0,0 +1,454 @@
1
+ // Direct Playwright Crawler - No Docker Required!
2
+ import * as cheerio from 'cheerio';
3
+ // ============================================================================
4
+ // Playwright Crawler
5
+ // ============================================================================
6
+ let playwrightInstance = null;
7
+ let browserInstance = null;
8
+ async function getPlaywright() {
9
+ if (playwrightInstance)
10
+ return playwrightInstance;
11
+ try {
12
+ // Dynamic import to avoid hard dependency
13
+ playwrightInstance = await import('playwright');
14
+ return playwrightInstance;
15
+ }
16
+ catch (error) {
17
+ throw new Error('Playwright not installed. Install with: npm install playwright');
18
+ }
19
+ }
20
+ async function getBrowser() {
21
+ if (browserInstance)
22
+ return browserInstance;
23
+ const pw = await getPlaywright();
24
+ browserInstance = await pw.chromium.launch({
25
+ headless: true,
26
+ args: ['--no-sandbox', '--disable-setuid-sandbox']
27
+ });
28
+ return browserInstance;
29
+ }
30
+ /**
31
+ * Crawls a URL using Playwright and returns markdown content
32
+ */
33
+ export async function crawlWithPlaywright(url, options = {}) {
34
+ const startTime = Date.now();
35
+ try {
36
+ const pw = await getPlaywright();
37
+ const browser = await getBrowser();
38
+ const context = await browser.newContext({
39
+ userAgent: 'Mozilla/5.0 (compatible; ClaudeMCP-Bot/1.0)'
40
+ });
41
+ const page = await context.newPage();
42
+ // Set timeout
43
+ const timeout = options.timeout || 30000;
44
+ page.setDefaultTimeout(timeout);
45
+ // Navigate to URL
46
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout });
47
+ // Wait for selector if specified
48
+ if (options.waitForSelector) {
49
+ await page.waitForSelector(options.waitForSelector, { timeout });
50
+ }
51
+ // Execute custom JavaScript if specified
52
+ if (options.executeJs) {
53
+ await page.evaluate(options.executeJs);
54
+ }
55
+ // Extract content
56
+ const content = await page.evaluate(() => {
57
+ return {
58
+ title: document.title,
59
+ html: document.documentElement.outerHTML,
60
+ text: document.body.innerText
61
+ };
62
+ });
63
+ // Extract links count
64
+ const links = await page.evaluate(() => {
65
+ const anchors = Array.from(document.querySelectorAll('a[href]'));
66
+ let internal = 0;
67
+ let external = 0;
68
+ anchors.forEach((anchor) => {
69
+ const href = anchor.getAttribute('href');
70
+ if (href) {
71
+ if (href.startsWith('/') || href.startsWith(window.location.origin)) {
72
+ internal++;
73
+ }
74
+ else {
75
+ external++;
76
+ }
77
+ }
78
+ });
79
+ return { internal, external };
80
+ });
81
+ // Extract images if requested
82
+ let images = [];
83
+ if (options.includeImages) {
84
+ images = await page.evaluate(() => {
85
+ const imgs = Array.from(document.querySelectorAll('img[src]'));
86
+ return imgs.map((img) => img.src).filter((src) => src && !src.startsWith('data:'));
87
+ });
88
+ }
89
+ // Generate markdown from HTML
90
+ const $ = cheerio.load(content.html);
91
+ // Remove script and style elements
92
+ $('script, style, nav, footer, iframe').remove();
93
+ // Convert to markdown
94
+ const markdown = htmlToMarkdown($, content.title);
95
+ // Take screenshot if requested
96
+ if (options.screenshot) {
97
+ await page.screenshot({ path: `screenshot-${Date.now()}.png`, fullPage: false });
98
+ }
99
+ await context.close();
100
+ const durationMs = Date.now() - startTime;
101
+ return {
102
+ success: true,
103
+ url,
104
+ title: content.title,
105
+ markdown,
106
+ wordCount: markdown.split(/\s+/).length,
107
+ links,
108
+ images: options.includeImages ? images : undefined,
109
+ durationMs
110
+ };
111
+ }
112
+ catch (error) {
113
+ const durationMs = Date.now() - startTime;
114
+ return {
115
+ success: false,
116
+ error: error instanceof Error ? error.message : String(error),
117
+ durationMs
118
+ };
119
+ }
120
+ }
121
+ /**
122
+ * Converts HTML to markdown format
123
+ */
124
+ function htmlToMarkdown($, title) {
125
+ let markdown = `# ${title}\n\n`;
126
+ // Process headings
127
+ $('h1, h2, h3, h4, h5, h6').each((_, el) => {
128
+ const tagName = el.tagName;
129
+ const level = tagName.charAt(1);
130
+ const text = $(el).text().trim();
131
+ markdown += `${'#'.repeat(parseInt(level))} ${text}\n\n`;
132
+ });
133
+ // Process paragraphs
134
+ $('p').each((_, el) => {
135
+ const text = $(el).text().trim();
136
+ if (text) {
137
+ markdown += `${text}\n\n`;
138
+ }
139
+ });
140
+ // Process lists
141
+ $('ul, ol').each((_, el) => {
142
+ const isOrdered = el.tagName === 'ol';
143
+ const items = $(el).find('li');
144
+ items.each((index, _) => {
145
+ const text = $(el).find('li').eq(index).text().trim();
146
+ const prefix = isOrdered ? `${index + 1}.` : '-';
147
+ markdown += `${prefix} ${text}\n`;
148
+ });
149
+ markdown += '\n';
150
+ });
151
+ // Process code blocks
152
+ $('pre').each((_, el) => {
153
+ const code = $(el).find('code').text().trim();
154
+ markdown += `\`\`\`\n${code}\n\`\`\`\n\n`;
155
+ });
156
+ // Process links
157
+ $('a[href]').each((_, el) => {
158
+ const href = $(el).attr('href');
159
+ const text = $(el).text().trim();
160
+ if (href && text) {
161
+ markdown += `[${text}](${href})`;
162
+ }
163
+ });
164
+ // Process bold and italic
165
+ $('strong, b').each((_, el) => {
166
+ const text = $(el).text().trim();
167
+ markdown += `**${text}**`;
168
+ });
169
+ $('em, i').each((_, el) => {
170
+ const text = $(el).text().trim();
171
+ markdown += `*${text}*`;
172
+ });
173
+ // If no structured content was found, fall back to body text
174
+ if (markdown === `# ${title}\n\n`) {
175
+ const bodyText = $('body').text().trim();
176
+ const paragraphs = bodyText.split(/\n\n+/);
177
+ markdown = `# ${title}\n\n`;
178
+ paragraphs.forEach((p) => {
179
+ if (p.trim()) {
180
+ markdown += `${p.trim()}\n\n`;
181
+ }
182
+ });
183
+ }
184
+ return markdown;
185
+ }
186
+ /**
187
+ * Batch crawl multiple URLs with concurrency control
188
+ */
189
+ export async function batchCrawl(urls, options = {}) {
190
+ const { concurrency = 3, ...crawlOptions } = options;
191
+ const results = [];
192
+ const queue = [...urls];
193
+ const workers = Array.from({ length: Math.min(concurrency, urls.length) }, async () => {
194
+ while (queue.length > 0) {
195
+ const url = queue.shift();
196
+ const result = await crawlWithPlaywright(url, crawlOptions);
197
+ results.push(result);
198
+ }
199
+ });
200
+ await Promise.all(workers);
201
+ return results;
202
+ }
203
+ /**
204
+ * Clean up resources
205
+ */
206
+ export async function cleanup() {
207
+ if (browserInstance) {
208
+ await browserInstance.close();
209
+ browserInstance = null;
210
+ }
211
+ }
212
+ /**
213
+ * Capture a screenshot of a webpage
214
+ */
215
+ export async function captureScreenshot(url, options = {}) {
216
+ try {
217
+ const pw = await getPlaywright();
218
+ const browser = await getBrowser();
219
+ const context = await browser.newContext({
220
+ userAgent: 'Mozilla/5.0 (compatible; ClaudeMCP-Bot/1.0)'
221
+ });
222
+ const page = await context.newPage();
223
+ await page.goto(url, { waitUntil: 'networkidle' });
224
+ // Wait extra time if specified
225
+ if (options.waitFor) {
226
+ await page.waitForTimeout(options.waitFor * 1000);
227
+ }
228
+ // Capture screenshot as base64
229
+ const buffer = await page.screenshot({
230
+ fullPage: options.fullPage || false,
231
+ type: 'png'
232
+ });
233
+ await context.close();
234
+ const base64 = buffer.toString('base64');
235
+ return {
236
+ success: true,
237
+ base64,
238
+ path: `screenshot-${Date.now()}.png`
239
+ };
240
+ }
241
+ catch (error) {
242
+ return {
243
+ success: false,
244
+ error: error instanceof Error ? error.message : String(error)
245
+ };
246
+ }
247
+ }
248
+ /**
249
+ * Generate a PDF from a webpage
250
+ */
251
+ export async function generatePDF(url) {
252
+ try {
253
+ const pw = await getPlaywright();
254
+ const browser = await getBrowser();
255
+ const context = await browser.newContext({
256
+ userAgent: 'Mozilla/5.0 (compatible; ClaudeMCP-Bot/1.0)'
257
+ });
258
+ const page = await context.newPage();
259
+ await page.goto(url, { waitUntil: 'networkidle' });
260
+ // Generate PDF as base64
261
+ const buffer = await page.pdf({
262
+ format: 'A4',
263
+ printBackground: true
264
+ });
265
+ await context.close();
266
+ const base64 = buffer.toString('base64');
267
+ return {
268
+ success: true,
269
+ base64,
270
+ path: `output-${Date.now()}.pdf`
271
+ };
272
+ }
273
+ catch (error) {
274
+ return {
275
+ success: false,
276
+ error: error instanceof Error ? error.message : String(error)
277
+ };
278
+ }
279
+ }
280
+ /**
281
+ * Execute custom JavaScript on a webpage
282
+ */
283
+ export async function executeJS(url, scripts) {
284
+ try {
285
+ const pw = await getPlaywright();
286
+ const browser = await getBrowser();
287
+ const context = await browser.newContext({
288
+ userAgent: 'Mozilla/5.0 (compatible; ClaudeMCP-Bot/1.0)'
289
+ });
290
+ const page = await context.newPage();
291
+ await page.goto(url, { waitUntil: 'domcontentloaded' });
292
+ const results = [];
293
+ for (const script of scripts) {
294
+ try {
295
+ const result = await page.evaluate(script);
296
+ results.push(result);
297
+ }
298
+ catch (e) {
299
+ results.push({ error: e instanceof Error ? e.message : String(e) });
300
+ }
301
+ }
302
+ await context.close();
303
+ return {
304
+ success: true,
305
+ results
306
+ };
307
+ }
308
+ catch (error) {
309
+ return {
310
+ success: false,
311
+ error: error instanceof Error ? error.message : String(error)
312
+ };
313
+ }
314
+ }
315
+ /**
316
+ * Extract structured data using CSS selectors
317
+ */
318
+ export async function extractStructured(url, baseSelector, fields) {
319
+ try {
320
+ const pw = await getPlaywright();
321
+ const browser = await getBrowser();
322
+ const context = await browser.newContext({
323
+ userAgent: 'Mozilla/5.0 (compatible; ClaudeMCP-Bot/1.0)'
324
+ });
325
+ const page = await context.newPage();
326
+ await page.goto(url, { waitUntil: 'domcontentloaded' });
327
+ const data = await page.evaluate((args) => {
328
+ const { baseSel, fieldList } = args;
329
+ const results = [];
330
+ const baseElements = document.querySelectorAll(baseSel);
331
+ baseElements.forEach((el) => {
332
+ const item = {};
333
+ fieldList.forEach((field) => {
334
+ const targetEl = el.querySelector(field.selector);
335
+ if (targetEl) {
336
+ switch (field.type) {
337
+ case 'text':
338
+ item[field.name] = targetEl.textContent?.trim() || '';
339
+ break;
340
+ case 'html':
341
+ item[field.name] = targetEl.innerHTML;
342
+ break;
343
+ case 'attribute':
344
+ item[field.name] = targetEl.getAttribute(field.attribute || '') || '';
345
+ break;
346
+ }
347
+ }
348
+ else {
349
+ item[field.name] = null;
350
+ }
351
+ });
352
+ results.push(item);
353
+ });
354
+ return results;
355
+ }, { baseSel: baseSelector, fieldList: fields });
356
+ await context.close();
357
+ return {
358
+ success: true,
359
+ data: data
360
+ };
361
+ }
362
+ catch (error) {
363
+ return {
364
+ success: false,
365
+ error: error instanceof Error ? error.message : String(error)
366
+ };
367
+ }
368
+ }
369
+ // Built-in regex patterns
370
+ const REGEX_PATTERNS = {
371
+ email: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
372
+ phone_intl: /[\+]?[(]?[0-9]{3}[)]?[-\s\.]?[0-9]{3}[-\s\.]?[0-9]{4,6}/g,
373
+ phone_us: /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/g,
374
+ url: /https?:\/\/[^\s<>{}|\\^`\[\]]+/g,
375
+ ipv4: /\b(?:\d{1,3}\.){3}\d{1,3}\b/g,
376
+ ipv6: /(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\b/g,
377
+ uuid: /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi,
378
+ currency: /[$€£¥₹]\s?\d+(?:,\d{3})*(?:\.\d{2})?|\d+(?:,\d{3})*(?:\.\d{2})?\s?[$€£¥₹]/g,
379
+ percentage: /\b\d+(\.\d+)?%\b/g,
380
+ number: /\b\d+(?:,\d{3})*(?:\.\d+)?\b/g,
381
+ date_iso: /\b\d{4}-\d{2}-\d{2}\b/g,
382
+ date_us: /\b(?:0[1-9]|1[0-2])\/(?:0[1-9]|[12][0-9]|3[01])\/\d{4}\b/g,
383
+ time_24h: /\b([01]?[0-9]|2[0-3]):[0-5][0-9]\b/g,
384
+ postal_us: /\b\d{5}(-\d{4})?\b/g,
385
+ postal_uk: /[A-Z]{1,2}\d[A-Z\d]? ?\d[A-Z]{2}/gi,
386
+ hex_color: /#([0-9a-fA-F]{3}|[0-9a-fA-F]{6})\b/g,
387
+ twitter_handle: /@[\w]{1,15}\b/g,
388
+ hashtag: /#[\w]+/g,
389
+ mac_addr: /([0-9A-Fa-f]{2}[:-]){5}([0-9A-Fa-f]{2})/g,
390
+ iban: /[A-Z]{2}\d{2}[A-Z0-9]{11,30}/g,
391
+ credit_card: /\b(?:\d[ -]*?){13,16}\b/g
392
+ };
393
+ /**
394
+ * Extract data using regex patterns
395
+ */
396
+ export async function extractRegex(url, patterns = [], customPatterns) {
397
+ try {
398
+ const pw = await getPlaywright();
399
+ const browser = await getBrowser();
400
+ const context = await browser.newContext({
401
+ userAgent: 'Mozilla/5.0 (compatible; ClaudeMCP-Bot/1.0)'
402
+ });
403
+ const page = await context.newPage();
404
+ await page.goto(url, { waitUntil: 'domcontentloaded' });
405
+ // Get page text
406
+ const text = await page.evaluate(() => document.body.innerText);
407
+ const matches = {};
408
+ // Process built-in patterns
409
+ for (const patternName of patterns) {
410
+ if (patternName === 'all') {
411
+ // Run all built-in patterns
412
+ for (const [name, regex] of Object.entries(REGEX_PATTERNS)) {
413
+ const found = text.match(regex);
414
+ if (found) {
415
+ matches[name] = [...new Set(found)]; // Deduplicate
416
+ }
417
+ }
418
+ }
419
+ else if (REGEX_PATTERNS[patternName]) {
420
+ const found = text.match(REGEX_PATTERNS[patternName]);
421
+ if (found) {
422
+ matches[patternName] = [...new Set(found)]; // Deduplicate
423
+ }
424
+ }
425
+ }
426
+ // Process custom patterns
427
+ if (customPatterns) {
428
+ for (const [name, pattern] of Object.entries(customPatterns)) {
429
+ try {
430
+ const regex = new RegExp(pattern, 'g');
431
+ const found = text.match(regex);
432
+ if (found) {
433
+ matches[name] = [...new Set(found)]; // Deduplicate
434
+ }
435
+ }
436
+ catch {
437
+ // Skip invalid regex patterns
438
+ }
439
+ }
440
+ }
441
+ await context.close();
442
+ return {
443
+ success: true,
444
+ matches
445
+ };
446
+ }
447
+ catch (error) {
448
+ return {
449
+ success: false,
450
+ error: error instanceof Error ? error.message : String(error)
451
+ };
452
+ }
453
+ }
454
+ //# sourceMappingURL=playwright-crawler.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"playwright-crawler.js","sourceRoot":"","sources":["../src/playwright-crawler.ts"],"names":[],"mappings":"AAAA,kDAAkD;AAClD,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AA2BnC,+EAA+E;AAC/E,qBAAqB;AACrB,+EAA+E;AAE/E,IAAI,kBAAkB,GAAQ,IAAI,CAAC;AACnC,IAAI,eAAe,GAAQ,IAAI,CAAC;AAEhC,KAAK,UAAU,aAAa;IAC1B,IAAI,kBAAkB;QAAE,OAAO,kBAAkB,CAAC;IAElD,IAAI,CAAC;QACH,0CAA0C;QAC1C,kBAAkB,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;QAChD,OAAO,kBAAkB,CAAC;IAC5B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,gEAAgE,CAAC,CAAC;IACpF,CAAC;AACH,CAAC;AAED,KAAK,UAAU,UAAU;IACvB,IAAI,eAAe;QAAE,OAAO,eAAe,CAAC;IAE5C,MAAM,EAAE,GAAG,MAAM,aAAa,EAAE,CAAC;IACjC,eAAe,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;QACzC,QAAQ,EAAE,IAAI;QACd,IAAI,EAAE,CAAC,cAAc,EAAE,0BAA0B,CAAC;KACnD,CAAC,CAAC;IAEH,OAAO,eAAe,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,GAAW,EACX,UAAkC,EAAE;IAEpC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,MAAM,aAAa,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;YACvC,SAAS,EAAE,6CAA6C;SACzD,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,cAAc;QACd,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,KAAK,CAAC;QACzC,IAAI,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAEhC,kBAAkB;QAClB,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,kBAAkB,EAAE,OAAO,EAAE,CAAC,CAAC;QAEjE,iCAAiC;QACjC,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;YAC5B,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;QACnE,CAAC;QAED,yCAAyC;QACzC,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QACzC,CAAC;QAED,kBAAkB;QAClB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;YACvC,OAAO;gBACL,KAAK,EAAG,QAAgB,CAAC,KAAK;gBAC9B,IAAI,EAAG,QAAgB,CAAC,eAAe,CAAC,SAAS;gBACjD,IAAI,EAAG,QAAgB,CAAC,IAAI,CAAC,SAAS;aACvC,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,sBAAsB;QACtB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;YACrC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAE,QAAgB,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC,CAAC;YAC1E,IAAI,QAAQ,GAAG,CAAC,CAAC;YACjB,IAAI,QAAQ,GAAG,CAAC,CAAC;YAEjB,OAAO,CAAC,OAAO,CAAC,CAAC,MAAW,EAAE,EAAE;gBAC9B,MAAM,IAAI,GAAG,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;gBACzC,IAAI,IAAI,EAAE,CAAC;oBACT,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;wBACpE,QAAQ,EAAE,CAAC;oBACb,CAAC;yBAAM,CAAC;wBACN,QAAQ,EAAE,CAAC;oBACb,CAAC;gBACH,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC;QAChC,CAAC,CAAC,CAAC;QAEH,8BAA8B;QAC9B,IAAI,MAAM,GAAa,EAAE,CAAC;QAC1B,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;YAC1B,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;gBAChC,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAE,QAAgB,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC,CAAC;gBACxE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAQ,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,GAAW,EAAE,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC;YAClG,CAAC,CAAC,CAAC;QACL,CAAC;QAED,8BAA8B;QAC9B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAErC,mCAAmC;QACnC,CAAC,CAAC,oCAAoC,CAAC,CAAC,MAAM,EAAE,CAAC;QAEjD,sBAAsB;QACtB,MAAM,QAAQ,GAAG,cAAc,CAAC,CAAC,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;QAElD,+BAA+B;QAC/B,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;YACvB,MAAM,IAAI,CAAC,UAAU,CAAC,EAAE,IAAI,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QACnF,CAAC;QAED,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QAEtB,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAE1C,OAAO;YACL,OAAO,EAAE,IAAI;YACb,GAAG;YACH,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,QAAQ;YACR,SAAS,EAAE,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;YACvC,KAAK;YACL,MAAM,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;YAClD,UAAU;SACX,CAAC;IAEJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAE1C,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;YAC7D,UAAU;SACX,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,CAAkC,EAAE,KAAa;IACvE,IAAI,QAAQ,GAAG,KAAK,KAAK,MAAM,CAAC;IAEhC,mBAAmB;IACnB,CAAC,CAAC,wBAAwB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAO,EAAE,EAAE;QACtD,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC;QAC3B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,QAAQ,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,IAAI,IAAI,MAAM,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,qBAAqB;IACrB,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAO,EAAE,EAAE;QACjC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,IAAI,IAAI,EAAE,CAAC;YACT,QAAQ,IAAI,GAAG,IAAI,MAAM,CAAC;QAC5B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,gBAAgB;IAChB,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAO,EAAE,EAAE;QACtC,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,KAAK,IAAI,CAAC;QACtC,MAAM,KAAK,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE/B,KAAK,CAAC,IAAI,CAAC,CAAC,KAAa,EAAE,CAAM,EAAE,EAAE;YACnC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YACtD,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;YACjD,QAAQ,IAAI,GAAG,MAAM,IAAI,IAAI,IAAI,CAAC;QACpC,CAAC,CAAC,CAAC;QACH,QAAQ,IAAI,IAAI,CAAC;IACnB,CAAC,CAAC,CAAC;IAEH,sBAAsB;IACtB,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAO,EAAE,EAAE;QACnC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QAC9C,QAAQ,IAAI,WAAW,IAAI,cAAc,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,gBAAgB;IAChB,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAO,EAAE,EAAE;QACvC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,IAAI,IAAI,IAAI,IAAI,EAAE,CAAC;YACjB,QAAQ,IAAI,IAAI,IAAI,KAAK,IAAI,GAAG,CAAC;QACnC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,0BAA0B;IAC1B,CAAC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAO,EAAE,EAAE;QACzC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,QAAQ,IAAI,KAAK,IAAI,IAAI,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAO,EAAE,EAAE;QACrC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,QAAQ,IAAI,IAAI,IAAI,GAAG,CAAC;IAC1B,CAAC,CAAC,CAAC;IAEH,6DAA6D;IAC7D,IAAI,QAAQ,KAAK,KAAK,KAAK,MAAM,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACzC,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC3C,QAAQ,GAAG,KAAK,KAAK,MAAM,CAAC;QAC5B,UAAU,CAAC,OAAO,CAAC,CAAC,CAAS,EAAE,EAAE;YAC/B,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;gBACb,QAAQ,IAAI,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC;YAChC,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,IAAc,EACd,UAA6D,EAAE;IAE/D,MAAM,EAAE,WAAW,GAAG,CAAC,EAAE,GAAG,YAAY,EAAE,GAAG,OAAO,CAAC;IAErD,MAAM,OAAO,GAA4B,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;IAExB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,KAAK,IAAI,EAAE;QACpF,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,EAAG,CAAC;YAC3B,MAAM,MAAM,GAAG,MAAM,mBAAmB,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;YAC5D,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAE3B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO;IAC3B,IAAI,eAAe,EAAE,CAAC;QACpB,MAAM,eAAe,CAAC,KAAK,EAAE,CAAC;QAC9B,eAAe,GAAG,IAAI,CAAC;IACzB,CAAC;AACH,CAAC;AAoBD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,GAAW,EACX,UAAoD,EAAE;IAEtD,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,MAAM,aAAa,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;YACvC,SAAS,EAAE,6CAA6C;SACzD,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,aAAa,EAAE,CAAC,CAAC;QAEnD,+BAA+B;QAC/B,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;YACpB,MAAM,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;QACpD,CAAC;QAED,+BAA+B;QAC/B,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC;YACnC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,KAAK;YACnC,IAAI,EAAE,KAAK;SACZ,CAAC,CAAC;QAEH,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QAEtB,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAEzC,OAAO;YACL,OAAO,EAAE,IAAI;YACb,MAAM;YACN,IAAI,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,MAAM;SACrC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,GAAW;IAC3C,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,MAAM,aAAa,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;YACvC,SAAS,EAAE,6CAA6C;SACzD,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,aAAa,EAAE,CAAC,CAAC;QAEnD,yBAAyB;QACzB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC;YAC5B,MAAM,EAAE,IAAI;YACZ,eAAe,EAAE,IAAI;SACtB,CAAC,CAAC;QAEH,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QAEtB,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAEzC,OAAO;YACL,OAAO,EAAE,IAAI;YACb,MAAM;YACN,IAAI,EAAE,UAAU,IAAI,CAAC,GAAG,EAAE,MAAM;SACjC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC;IACJ,CAAC;AACH,CAAC;AAYD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,GAAW,EACX,OAAiB;IAEjB,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,MAAM,aAAa,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;YACvC,SAAS,EAAE,6CAA6C;SACzD,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,kBAAkB,EAAE,CAAC,CAAC;QAExD,MAAM,OAAO,GAAc,EAAE,CAAC;QAE9B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;gBAC3C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACtE,CAAC;QACH,CAAC;QAED,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QAEtB,OAAO;YACL,OAAO,EAAE,IAAI;YACb,OAAO;SACR,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC;IACJ,CAAC;AACH,CAAC;AAmBD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,GAAW,EACX,YAAoB,EACpB,MAAyB;IAEzB,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,MAAM,aAAa,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;YACvC,SAAS,EAAE,6CAA6C;SACzD,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,kBAAkB,EAAE,CAAC,CAAC;QAExD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAuD,EAAE,EAAE;YAC3F,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC;YACpC,MAAM,OAAO,GAA8B,EAAE,CAAC;YAC9C,MAAM,YAAY,GAAG,QAAQ,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;YAExD,YAAY,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;gBAC1B,MAAM,IAAI,GAA4B,EAAE,CAAC;gBAEzC,SAAS,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;oBAC1B,MAAM,QAAQ,GAAI,EAAc,CAAC,aAAa,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;oBAE/D,IAAI,QAAQ,EAAE,CAAC;wBACb,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;4BACnB,KAAK,MAAM;gCACT,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;gCACtD,MAAM;4BACR,KAAK,MAAM;gCACT,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,SAAS,CAAC;gCACtC,MAAM;4BACR,KAAK,WAAW;gCACd,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,YAAY,CAAC,KAAK,CAAC,SAAS,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gCACtE,MAAM;wBACV,CAAC;oBACH,CAAC;yBAAM,CAAC;wBACN,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;oBAC1B,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEH,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACrB,CAAC,CAAC,CAAC;YAEH,OAAO,OAAO,CAAC;QACjB,CAAC,EAAE,EAAE,OAAO,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC;QAEjD,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QAEtB,OAAO;YACL,OAAO,EAAE,IAAI;YACb,IAAI,EAAE,IAAiC;SACxC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC;IACJ,CAAC;AACH,CAAC;AAYD,0BAA0B;AAC1B,MAAM,cAAc,GAA2B;IAC7C,KAAK,EAAE,iDAAiD;IACxD,UAAU,EAAE,0DAA0D;IACtE,QAAQ,EAAE,gCAAgC;IAC1C,GAAG,EAAE,iCAAiC;IACtC,IAAI,EAAE,8BAA8B;IACpC,IAAI,EAAE,wpBAAwpB;IAC9pB,IAAI,EAAE,gEAAgE;IACtE,QAAQ,EAAE,4EAA4E;IACtF,UAAU,EAAE,mBAAmB;IAC/B,MAAM,EAAE,+BAA+B;IACvC,QAAQ,EAAE,wBAAwB;IAClC,OAAO,EAAE,2DAA2D;IACpE,QAAQ,EAAE,qCAAqC;IAC/C,SAAS,EAAE,qBAAqB;IAChC,SAAS,EAAE,oCAAoC;IAC/C,SAAS,EAAE,qCAAqC;IAChD,cAAc,EAAE,gBAAgB;IAChC,OAAO,EAAE,SAAS;IAClB,QAAQ,EAAE,0CAA0C;IACpD,IAAI,EAAE,+BAA+B;IACrC,WAAW,EAAE,0BAA0B;CACxC,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,GAAW,EACX,WAAqB,EAAE,EACvB,cAAuC;IAEvC,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,MAAM,aAAa,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;YACvC,SAAS,EAAE,6CAA6C;SACzD,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,kBAAkB,EAAE,CAAC,CAAC;QAExD,gBAAgB;QAChB,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEhE,MAAM,OAAO,GAA6B,EAAE,CAAC;QAE7C,4BAA4B;QAC5B,KAAK,MAAM,WAAW,IAAI,QAAQ,EAAE,CAAC;YACnC,IAAI,WAAW,KAAK,KAAK,EAAE,CAAC;gBAC1B,4BAA4B;gBAC5B,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE,CAAC;oBAC3D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAoB,CAAC;oBACnD,IAAI,KAAK,EAAE,CAAC;wBACV,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,cAAc;oBACrD,CAAC;gBACH,CAAC;YACH,CAAC;iBAAM,IAAI,cAAc,CAAC,WAAW,CAAC,EAAE,CAAC;gBACvC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,WAAW,CAAC,CAAoB,CAAC;gBACzE,IAAI,KAAK,EAAE,CAAC;oBACV,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,cAAc;gBAC5D,CAAC;YACH,CAAC;QACH,CAAC;QAED,0BAA0B;QAC1B,IAAI,cAAc,EAAE,CAAC;YACnB,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE,CAAC;gBAC7D,IAAI,CAAC;oBACH,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;oBACvC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAoB,CAAC;oBACnD,IAAI,KAAK,EAAE,CAAC;wBACV,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,cAAc;oBACrD,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,8BAA8B;gBAChC,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QAEtB,OAAO;YACL,OAAO,EAAE,IAAI;YACb,OAAO;SACR,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -0,0 +1,58 @@
1
+ export interface Document {
2
+ id: string;
3
+ content: string;
4
+ metadata?: Record<string, unknown>;
5
+ }
6
+ export interface RankedDocument {
7
+ doc: Document;
8
+ score: number;
9
+ breakdown?: {
10
+ bm25: number;
11
+ freshness: number;
12
+ domain: number;
13
+ };
14
+ }
15
+ export interface HybridScoreConfig {
16
+ alpha: number;
17
+ beta: number;
18
+ gamma: number;
19
+ }
20
+ export interface SearchResult {
21
+ position: number;
22
+ score: number;
23
+ title: string;
24
+ url: string;
25
+ snippet: string;
26
+ provider: 'duckduckgo' | 'google';
27
+ domain: string;
28
+ publishedDate?: string;
29
+ }
30
+ export interface RawSearchResult {
31
+ title: string;
32
+ url: string;
33
+ description: string;
34
+ provider: 'google' | 'duckduckgo';
35
+ }
36
+ export declare class BM25Ranker {
37
+ private bm25;
38
+ private k1;
39
+ private b;
40
+ constructor(k1?: number, b?: number);
41
+ indexDocuments(documents: Document[]): void;
42
+ indexSearchResults(results: RawSearchResult[]): void;
43
+ rank(query: string, documents: Document[]): RankedDocument[];
44
+ rankSearchResults(query: string, results: RawSearchResult[]): SearchResult[];
45
+ private extractDomain;
46
+ }
47
+ export declare class HybridRanker {
48
+ private bm25Ranker;
49
+ private config;
50
+ constructor(config?: Partial<HybridScoreConfig>);
51
+ rank(query: string, documents: Document[]): RankedDocument[];
52
+ rankSearchResults(query: string, results: RawSearchResult[]): SearchResult[];
53
+ private calculateFreshnessScore;
54
+ private calculateDomainScore;
55
+ private extractDomain;
56
+ }
57
+ export declare function rerankWithFullContent(query: string, results: SearchResult[], extractedContent: Map<string, string>): SearchResult[];
58
+ //# sourceMappingURL=ranking.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ranking.d.ts","sourceRoot":"","sources":["../src/ranking.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,QAAQ,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE;QACV,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;CACH;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,YAAY,GAAG,QAAQ,CAAC;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,QAAQ,GAAG,YAAY,CAAC;CACnC;AAMD,qBAAa,UAAU;IACrB,OAAO,CAAC,IAAI,CAAqB;IACjC,OAAO,CAAC,EAAE,CAAe;IACzB,OAAO,CAAC,CAAC,CAAgB;gBAEb,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,MAAM;IAKnC,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,IAAI;IAM3C,kBAAkB,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG,IAAI;IASpD,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,GAAG,cAAc,EAAE;IAe5D,iBAAiB,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,EAAE,GAAG,YAAY,EAAE;IAsB5E,OAAO,CAAC,aAAa;CAOtB;AAMD,qBAAa,YAAY;IACvB,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,MAAM,CAAoB;gBAEtB,MAAM,CAAC,EAAE,OAAO,CAAC,iBAAiB,CAAC;IAU/C,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,GAAG,cAAc,EAAE;IAuB5D,iBAAiB,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,EAAE,GAAG,YAAY,EAAE;IAqB5E,OAAO,CAAC,uBAAuB;IAgB/B,OAAO,CAAC,oBAAoB;IAkD5B,OAAO,CAAC,aAAa;CAOtB;AAMD,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,YAAY,EAAE,EACvB,gBAAgB,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GACpC,YAAY,EAAE,CAiChB"}