one-search-mcp 1.0.5 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,9 +5,109 @@ import { Server } from "@modelcontextprotocol/sdk/server/index.js";
5
5
  import { CallToolRequestSchema, ListToolsRequestSchema } from "@modelcontextprotocol/sdk/types.js";
6
6
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
7
7
 
8
- // src/search.ts
8
+ // src/search/bing.ts
9
+ async function bingSearch(options) {
10
+ const { query, limit = 10, safeSearch = 0, page = 1, apiUrl = "https://api.bing.microsoft.com/v7.0/search", apiKey, language } = options;
11
+ const bingSafeSearchOptions = ["Off", "Moderate", "Strict"];
12
+ if (!apiKey) {
13
+ throw new Error("Bing API key is required");
14
+ }
15
+ const searchOptions = {
16
+ q: query,
17
+ count: limit,
18
+ offset: (page - 1) * limit,
19
+ mkt: language,
20
+ safeSearch: bingSafeSearchOptions[safeSearch]
21
+ };
22
+ try {
23
+ const queryParams = new URLSearchParams();
24
+ Object.entries(searchOptions).forEach(([key, value]) => {
25
+ if (value !== void 0) {
26
+ queryParams.set(key, value.toString());
27
+ }
28
+ });
29
+ const res = await fetch(`${apiUrl}?${queryParams}`, {
30
+ method: "GET",
31
+ headers: {
32
+ "Content-Type": "application/json",
33
+ "Ocp-Apim-Subscription-Key": apiKey
34
+ }
35
+ });
36
+ if (!res.ok) {
37
+ throw new Error(`Bing search error: ${res.status} ${res.statusText}`);
38
+ }
39
+ const data = await res.json();
40
+ const serp = data.webPages?.value;
41
+ const results = serp?.map((item) => ({
42
+ title: item.name,
43
+ snippet: item.snippet,
44
+ url: item.url,
45
+ source: item.siteName,
46
+ thumbnailUrl: item.thumbnailUrl,
47
+ language: item.language,
48
+ image: null,
49
+ video: null,
50
+ engine: "bing"
51
+ })) ?? [];
52
+ return {
53
+ results,
54
+ success: true
55
+ };
56
+ } catch (err) {
57
+ const msg = err instanceof Error ? err.message : "Bing search error.";
58
+ process.stdout.write(msg);
59
+ throw err;
60
+ }
61
+ }
62
+
63
+ // src/search/duckduckgo.ts
64
+ import * as DDG from "duck-duck-scrape";
65
+ import asyncRetry from "async-retry";
66
+ async function duckDuckGoSearch(options) {
67
+ try {
68
+ const { query, timeout = 1e4, safeSearch = DDG.SafeSearchType.OFF, retry = { retries: 3 }, ...searchOptions } = options;
69
+ const res = await asyncRetry(
70
+ () => {
71
+ return DDG.search(query, {
72
+ ...searchOptions,
73
+ safeSearch
74
+ }, {
75
+ // needle options
76
+ response_timeout: timeout
77
+ });
78
+ },
79
+ retry
80
+ );
81
+ const results = res ? {
82
+ noResults: res.noResults,
83
+ vqd: res.vqd,
84
+ results: res.results
85
+ } : {
86
+ noResults: true,
87
+ vqd: "",
88
+ results: []
89
+ };
90
+ return {
91
+ results: results.results.map((result) => ({
92
+ title: result.title,
93
+ snippet: result.description,
94
+ url: result.url,
95
+ source: result.hostname,
96
+ image: null,
97
+ video: null,
98
+ engine: "duckduckgo"
99
+ })),
100
+ success: true
101
+ };
102
+ } catch (error) {
103
+ const msg = error instanceof Error ? error.message : "DuckDuckGo search error.";
104
+ process.stdout.write(msg);
105
+ throw error;
106
+ }
107
+ }
108
+
109
+ // src/search/searxng.ts
9
110
  import url from "node:url";
10
- import { tavily } from "@tavily/core";
11
111
  async function searxngSearch(params) {
12
112
  try {
13
113
  const {
@@ -90,7 +190,9 @@ async function searxngSearch(params) {
90
190
  throw err;
91
191
  }
92
192
  }
93
- var tvly = null;
193
+
194
+ // src/search/tavily.ts
195
+ import { tavily } from "@tavily/core";
94
196
  async function tavilySearch(options) {
95
197
  const {
96
198
  query,
@@ -102,27 +204,1104 @@ async function tavilySearch(options) {
102
204
  if (!apiKey) {
103
205
  throw new Error("Tavily API key is required");
104
206
  }
105
- if (!tvly) {
106
- tvly = tavily({
207
+ try {
208
+ const tvly = tavily({
107
209
  apiKey
108
210
  });
211
+ const params = {
212
+ topic: categories,
213
+ timeRange,
214
+ maxResults: limit
215
+ };
216
+ const res = await tvly.search(query, params);
217
+ const results = res.results.map((item) => ({
218
+ title: item.title,
219
+ url: item.url,
220
+ snippet: item.content,
221
+ engine: "tavily"
222
+ }));
223
+ return {
224
+ results,
225
+ success: true
226
+ };
227
+ } catch (error) {
228
+ const msg = error instanceof Error ? error.message : "Tavily search error.";
229
+ process.stdout.write(msg);
230
+ throw error;
109
231
  }
110
- const params = {
111
- topic: categories,
112
- timeRange,
113
- maxResults: limit
114
- };
115
- const res = await tvly.search(query, params);
116
- const results = res.results.map((item) => ({
117
- title: item.title,
118
- url: item.url,
119
- snippet: item.content
120
- }));
232
+ }
233
+
234
+ // src/libs/browser/types.ts
235
+ import { Page } from "puppeteer-core";
236
+
237
+ // src/libs/browser/finder.ts
238
+ import * as fs from "fs";
239
+ import * as path from "path";
240
+ import * as os from "os";
241
+ import { defaultLogger } from "@agent-infra/logger";
242
+ var BrowserFinder = class {
243
+ /**
244
+ * Logger instance for diagnostic output
245
+ */
246
+ logger;
247
+ /**
248
+ * Creates a new BrowserFinder instance
249
+ * @param {Logger} [logger] - Optional custom logger
250
+ */
251
+ constructor(logger3) {
252
+ this.logger = logger3 ?? defaultLogger;
253
+ }
254
+ /**
255
+ * Getter that returns the list of supported browsers with their platform-specific paths
256
+ * @returns {Browser[]} Array of browser configurations
257
+ * @private
258
+ */
259
+ get browsers() {
260
+ const HOME_DIR = os.homedir();
261
+ const LOCAL_APP_DATA = process.env.LOCALAPPDATA;
262
+ return [
263
+ {
264
+ name: "Chromium",
265
+ executable: {
266
+ win32: "C:\\Program Files\\Chromium\\Application\\chrome.exe",
267
+ darwin: "/Applications/Chromium.app/Contents/MacOS/Chromium",
268
+ linux: "/usr/bin/chromium"
269
+ },
270
+ userDataDir: {
271
+ win32: `${LOCAL_APP_DATA}\\Chromium\\User Data`,
272
+ darwin: `${HOME_DIR}/Library/Application Support/Chromium`,
273
+ linux: `${HOME_DIR}/.config/chromium`
274
+ }
275
+ },
276
+ {
277
+ name: "Google Chrome",
278
+ executable: {
279
+ win32: "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
280
+ darwin: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
281
+ linux: "/usr/bin/google-chrome"
282
+ },
283
+ userDataDir: {
284
+ win32: `${LOCAL_APP_DATA}\\Google\\Chrome\\User Data`,
285
+ darwin: `${HOME_DIR}/Library/Application Support/Google/Chrome`,
286
+ linux: `${HOME_DIR}/.config/google-chrome`
287
+ }
288
+ },
289
+ {
290
+ name: "Google Chrome Canary",
291
+ executable: {
292
+ win32: "C:\\Program Files\\Google\\Chrome Canary\\Application\\chrome.exe",
293
+ darwin: "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
294
+ linux: "/usr/bin/google-chrome-canary"
295
+ },
296
+ userDataDir: {
297
+ win32: `${LOCAL_APP_DATA}\\Google\\Chrome Canary\\User Data`,
298
+ darwin: `${HOME_DIR}/Library/Application Support/Google/Chrome Canary`,
299
+ linux: `${HOME_DIR}/.config/google-chrome-canary`
300
+ }
301
+ }
302
+ ];
303
+ }
304
+ /**
305
+ * Find a specific browser or the first available browser
306
+ * @param {string} [name] - Optional browser name to find
307
+ * @returns {{ executable: string; userDataDir: string }} Browser executable and user data paths
308
+ * @throws {Error} If no supported browser is found or the platform is unsupported
309
+ */
310
+ findBrowser(name) {
311
+ const platform = process.platform;
312
+ this.logger.info("Finding browser on platform:", platform);
313
+ if (platform !== "darwin" && platform !== "win32" && platform !== "linux") {
314
+ const error = new Error(`Unsupported platform: ${platform}`);
315
+ this.logger.error(error.message);
316
+ throw error;
317
+ }
318
+ const browser = name ? this.browsers.find(
319
+ (b) => b.name === name && fs.existsSync(b.executable[platform])
320
+ ) : this.browsers.find((b) => fs.existsSync(b.executable[platform]));
321
+ this.logger.log("browser", browser);
322
+ if (!browser) {
323
+ const error = name ? new Error(`Cannot find browser: ${name}`) : new Error(
324
+ "Cannot find a supported browser on your system. Please install Chrome, Edge, or Brave."
325
+ );
326
+ this.logger.error(error.message);
327
+ throw error;
328
+ }
329
+ const result = {
330
+ executable: browser.executable[platform],
331
+ userDataDir: browser.userDataDir[platform]
332
+ };
333
+ this.logger.success(`Found browser: ${browser.name}`);
334
+ this.logger.info("Browser details:", result);
335
+ return result;
336
+ }
337
+ /**
338
+ * Get browser profiles for a specific browser
339
+ * Reads the Local State file to extract profile information
340
+ * @param {string} [browserName] - Optional browser name to get profiles for
341
+ * @returns {Array<{ displayName: string; path: string }>} Array of profile objects with display names and paths
342
+ */
343
+ getBrowserProfiles(browserName) {
344
+ const browser = this.findBrowser(browserName);
345
+ try {
346
+ const localState = JSON.parse(
347
+ fs.readFileSync(path.join(browser.userDataDir, "Local State"), "utf8")
348
+ );
349
+ const profileInfo = localState.profile.info_cache;
350
+ return Object.entries(profileInfo).map(
351
+ ([profileName, info]) => ({
352
+ displayName: info.name,
353
+ path: path.join(browser.userDataDir, profileName)
354
+ })
355
+ );
356
+ } catch (error) {
357
+ return [];
358
+ }
359
+ }
360
+ /**
361
+ * Legacy method for backwards compatibility
362
+ * Finds Chrome browser executable path
363
+ * @deprecated Use findBrowser instead
364
+ * @returns {string | null} Chrome executable path or null if not found
365
+ */
366
+ findChrome() {
367
+ try {
368
+ const { executable } = this.findBrowser("Google Chrome");
369
+ return executable;
370
+ } catch {
371
+ return null;
372
+ }
373
+ }
374
+ };
375
+
376
+ // src/libs/browser/base.ts
377
+ import { defaultLogger as defaultLogger2 } from "@agent-infra/logger";
378
+ var BaseBrowser = class {
379
+ /**
380
+ * The underlying Puppeteer browser instance
381
+ * @protected
382
+ */
383
+ browser = null;
384
+ /**
385
+ * Logger instance for browser-related logging
386
+ * @protected
387
+ */
388
+ logger;
389
+ /**
390
+ * Reference to the currently active browser page
391
+ * @protected
392
+ */
393
+ activePage = null;
394
+ /**
395
+ * Creates an instance of BaseBrowser
396
+ * @param {BaseBrowserOptions} [options] - Configuration options
397
+ */
398
+ constructor(options) {
399
+ this.logger = options?.logger ?? defaultLogger2;
400
+ this.logger.info("Browser Options:", options);
401
+ }
402
+ /**
403
+ * Get the underlying Puppeteer browser instance
404
+ * @throws Error if browser is not launched
405
+
406
+ * @returns {puppeteer.Browser} Puppeteer browser instance
407
+ */
408
+ getBrowser() {
409
+ if (!this.browser) {
410
+ throw new Error("Browser not launched");
411
+ }
412
+ return this.browser;
413
+ }
414
+ /**
415
+ * Sets up listeners for browser page events
416
+ * Tracks page creation and updates active page reference
417
+ * @protected
418
+ */
419
+ async setupPageListener() {
420
+ if (!this.browser) return;
421
+ this.browser.on("targetcreated", async (target) => {
422
+ const page = await target.page();
423
+ if (page) {
424
+ this.logger.info("New page created:", await page.url());
425
+ this.activePage = page;
426
+ page.once("close", () => {
427
+ if (this.activePage === page) {
428
+ this.activePage = null;
429
+ }
430
+ });
431
+ page.once("error", () => {
432
+ if (this.activePage === page) {
433
+ this.activePage = null;
434
+ }
435
+ });
436
+ }
437
+ });
438
+ }
439
+ /**
440
+ * Closes the browser instance and cleans up resources
441
+ * @returns {Promise<void>} Promise that resolves when browser is closed
442
+ * @throws {Error} If browser fails to close properly
443
+ */
444
+ async close() {
445
+ this.logger.info("Closing browser");
446
+ try {
447
+ await this.browser?.close();
448
+ this.browser = null;
449
+ this.logger.success("Browser closed successfully");
450
+ } catch (error) {
451
+ this.logger.error("Failed to close browser:", error);
452
+ throw error;
453
+ }
454
+ }
455
+ /**
456
+ * Creates a new page, navigates to the specified URL, executes a function in the page context, and returns the result
457
+ * This method is inspired and modified from https://github.com/egoist/local-web-search/blob/04608ed09aa103e2fff6402c72ca12edfb692d19/src/browser.ts#L74
458
+ * @template T - Type of parameters passed to the page function
459
+ * @template R - Return type of the page function
460
+ * @param {EvaluateOnNewPageOptions<T, R>} options - Configuration options for the page evaluation
461
+ * @returns {Promise<R | null>} Promise resolving to the result of the page function or null
462
+ * @throws {Error} If page creation or evaluation fails
463
+ */
464
+ async evaluateOnNewPage(options) {
465
+ const {
466
+ url: url2,
467
+ pageFunction,
468
+ pageFunctionParams,
469
+ beforePageLoad,
470
+ afterPageLoad,
471
+ beforeSendResult,
472
+ waitForOptions
473
+ } = options;
474
+ const page = await this.browser.newPage();
475
+ try {
476
+ await beforePageLoad?.(page);
477
+ await page.goto(url2, {
478
+ waitUntil: "networkidle2",
479
+ ...waitForOptions
480
+ });
481
+ await afterPageLoad?.(page);
482
+ const _window = await page.evaluateHandle(() => window);
483
+ const result = await page.evaluate(
484
+ pageFunction,
485
+ _window,
486
+ ...pageFunctionParams
487
+ );
488
+ await beforeSendResult?.(page, result);
489
+ await _window.dispose();
490
+ await page.close();
491
+ return result;
492
+ } catch (error) {
493
+ await page.close();
494
+ throw error;
495
+ }
496
+ }
497
+ /**
498
+ * Creates a new browser page
499
+ * @returns {Promise<Page>} Promise resolving to the newly created page
500
+ * @throws {Error} If browser is not launched or page creation fails
501
+ */
502
+ async createPage() {
503
+ if (!this.browser) {
504
+ this.logger.error("No active browser");
505
+ throw new Error("Browser not launched");
506
+ }
507
+ const page = await this.browser.newPage();
508
+ return page;
509
+ }
510
+ /**
511
+ * Gets the currently active page or finds an active page if none is currently tracked
512
+ * If no active pages exist, creates a new page
513
+ * @returns {Promise<Page>} Promise resolving to the active page
514
+ * @throws {Error} If browser is not launched or no active page can be found/created
515
+ */
516
+ async getActivePage() {
517
+ if (!this.browser) {
518
+ throw new Error("Browser not launched");
519
+ }
520
+ if (this.activePage) {
521
+ try {
522
+ await this.activePage.evaluate(() => document.readyState);
523
+ return this.activePage;
524
+ } catch (e) {
525
+ this.logger.warn("Active page no longer available:", e);
526
+ this.activePage = null;
527
+ }
528
+ }
529
+ const pages = await this.browser.pages();
530
+ if (pages.length === 0) {
531
+ this.activePage = await this.createPage();
532
+ return this.activePage;
533
+ }
534
+ for (let i = pages.length - 1; i >= 0; i--) {
535
+ const page = pages[i];
536
+ try {
537
+ await page.evaluate(() => document.readyState);
538
+ this.activePage = page;
539
+ return page;
540
+ } catch (e) {
541
+ continue;
542
+ }
543
+ }
544
+ throw new Error("No active page found");
545
+ }
546
+ };
547
+
548
+ // src/libs/browser/local.ts
549
+ import * as puppeteer from "puppeteer-core";
550
+ var LocalBrowser = class extends BaseBrowser {
551
+ /**
552
+ * Browser finder instance to detect and locate installed browsers
553
+ * @private
554
+ */
555
+ browserFinder = new BrowserFinder();
556
+ /**
557
+ * Launches a local browser instance with specified options
558
+ * Automatically detects installed browsers if no executable path is provided
559
+ * @param {LaunchOptions} options - Configuration options for launching the browser
560
+ * @returns {Promise<void>} Promise that resolves when the browser is successfully launched
561
+ * @throws {Error} If the browser cannot be launched
562
+ */
563
+ async launch(options = {}) {
564
+ this.logger.info("Launching browser with options:", options);
565
+ const executablePath = options?.executablePath || this.browserFinder.findBrowser().executable;
566
+ this.logger.info("Using executable path:", executablePath);
567
+ const viewportWidth = options?.defaultViewport?.width ?? 1280;
568
+ const viewportHeight = options?.defaultViewport?.height ?? 800;
569
+ const puppeteerLaunchOptions = {
570
+ executablePath,
571
+ headless: options?.headless ?? false,
572
+ defaultViewport: {
573
+ width: viewportWidth,
574
+ height: viewportHeight
575
+ },
576
+ args: [
577
+ "--no-sandbox",
578
+ "--mute-audio",
579
+ "--disable-gpu",
580
+ "--disable-http2",
581
+ "--disable-blink-features=AutomationControlled",
582
+ "--disable-infobars",
583
+ "--disable-background-timer-throttling",
584
+ "--disable-popup-blocking",
585
+ "--disable-backgrounding-occluded-windows",
586
+ "--disable-renderer-backgrounding",
587
+ "--disable-window-activation",
588
+ "--disable-focus-on-load",
589
+ "--no-default-browser-check",
590
+ // disable default browser check
591
+ "--disable-web-security",
592
+ // disable CORS
593
+ "--disable-features=IsolateOrigins,site-per-process",
594
+ "--disable-site-isolation-trials",
595
+ `--window-size=${viewportWidth},${viewportHeight + 90}`,
596
+ options?.proxy ? `--proxy-server=${options.proxy}` : "",
597
+ options?.profilePath ? `--profile-directory=${options.profilePath}` : ""
598
+ ].filter(Boolean),
599
+ ignoreDefaultArgs: ["--enable-automation"],
600
+ timeout: options.timeout ?? 0,
601
+ downloadBehavior: {
602
+ policy: "deny"
603
+ }
604
+ };
605
+ this.logger.info("Launch options:", puppeteerLaunchOptions);
606
+ try {
607
+ this.browser = await puppeteer.launch(puppeteerLaunchOptions);
608
+ await this.setupPageListener();
609
+ this.logger.success("Browser launched successfully");
610
+ } catch (error) {
611
+ this.logger.error("Failed to launch browser:", error);
612
+ throw error;
613
+ }
614
+ }
615
+ };
616
+
617
+ // src/libs/browser/remote.ts
618
+ import * as puppeteer2 from "puppeteer-core";
619
+
620
+ // src/libs/browser-search/readability.ts
621
+ var READABILITY_SCRIPT = 'function q(t,e){if(e&&e.documentElement)t=e,e=arguments[2];else if(!t||!t.documentElement)throw new Error("First argument to Readability constructor should be a document object.");if(e=e||{},this._doc=t,this._docJSDOMParser=this._doc.firstChild.__JSDOMParser__,this._articleTitle=null,this._articleByline=null,this._articleDir=null,this._articleSiteName=null,this._attempts=[],this._debug=!!e.debug,this._maxElemsToParse=e.maxElemsToParse||this.DEFAULT_MAX_ELEMS_TO_PARSE,this._nbTopCandidates=e.nbTopCandidates||this.DEFAULT_N_TOP_CANDIDATES,this._charThreshold=e.charThreshold||this.DEFAULT_CHAR_THRESHOLD,this._classesToPreserve=this.CLASSES_TO_PRESERVE.concat(e.classesToPreserve||[]),this._keepClasses=!!e.keepClasses,this._serializer=e.serializer||function(i){return i.innerHTML},this._disableJSONLD=!!e.disableJSONLD,this._allowedVideoRegex=e.allowedVideoRegex||this.REGEXPS.videos,this._flags=this.FLAG_STRIP_UNLIKELYS|this.FLAG_WEIGHT_CLASSES|this.FLAG_CLEAN_CONDITIONALLY,this._debug){let i=function(r){if(r.nodeType==r.TEXT_NODE)return`${r.nodeName} ("${r.textContent}")`;let l=Array.from(r.attributes||[],function(a){return`${a.name}="${a.value}"`}).join(" ");return`<${r.localName} ${l}>`};this.log=function(){if(typeof console!="undefined"){let l=Array.from(arguments,a=>a&&a.nodeType==this.ELEMENT_NODE?i(a):a);l.unshift("Reader: (Readability)"),console.log.apply(console,l)}else if(typeof dump!="undefined"){var r=Array.prototype.map.call(arguments,function(l){return l&&l.nodeName?i(l):l}).join(" ");dump("Reader: (Readability) "+r+`\n`)}}}else this.log=function(){}}q.prototype={FLAG_STRIP_UNLIKELYS:1,FLAG_WEIGHT_CLASSES:2,FLAG_CLEAN_CONDITIONALLY:4,ELEMENT_NODE:1,TEXT_NODE:3,DEFAULT_MAX_ELEMS_TO_PARSE:0,DEFAULT_N_TOP_CANDIDATES:5,DEFAULT_TAGS_TO_SCORE:"section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","),DEFAULT_CHAR_THRESHOLD:500,REGEXPS:{unlikelyCandidates:/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,okMaybeItsACandidate:/and|article|body|column|content|main|shadow/i,positive:/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,negative:/-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,extraneous:/print|archive|comment|discuss|e[\\-]?mail|share|reply|all|login|sign|single|utility/i,byline:/byline|author|dateline|writtenby|p-author/i,replaceFonts:/<(\\/?)font[^>]*>/gi,normalize:/\\s{2,}/g,videos:/\\/\\/(www\\.)?((dailymotion|youtube|youtube-nocookie|player\\.vimeo|v\\.qq)\\.com|(archive|upload\\.wikimedia)\\.org|player\\.twitch\\.tv)/i,shareElements:/(\\b|_)(share|sharedaddy)(\\b|_)/i,nextLink:/(next|weiter|continue|>([^\\|]|$)|\xBB([^\\|]|$))/i,prevLink:/(prev|earl|old|new|<|\xAB)/i,tokenize:/\\W+/g,whitespace:/^\\s*$/,hasContent:/\\S$/,hashUrl:/^#.+/,srcsetUrl:/(\\S+)(\\s+[\\d.]+[xw])?(\\s*(?:,|$))/g,b64DataUrl:/^data:\\s*([^\\s;,]+)\\s*;\\s*base64\\s*,/i,commas:/\\u002C|\\u060C|\\uFE50|\\uFE10|\\uFE11|\\u2E41|\\u2E34|\\u2E32|\\uFF0C/g,jsonLdArticleTypes:/^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/},UNLIKELY_ROLES:["menu","menubar","complementary","navigation","alert","alertdialog","dialog"],DIV_TO_P_ELEMS:new Set(["BLOCKQUOTE","DL","DIV","IMG","OL","P","PRE","TABLE","UL"]),ALTER_TO_DIV_EXCEPTIONS:["DIV","ARTICLE","SECTION","P"],PRESENTATIONAL_ATTRIBUTES:["align","background","bgcolor","border","cellpadding","cellspacing","frame","hspace","rules","style","valign","vspace"],DEPRECATED_SIZE_ATTRIBUTE_ELEMS:["TABLE","TH","TD","HR","PRE"],PHRASING_ELEMS:["ABBR","AUDIO","B","BDO","BR","BUTTON","CITE","CODE","DATA","DATALIST","DFN","EM","EMBED","I","IMG","INPUT","KBD","LABEL","MARK","MATH","METER","NOSCRIPT","OBJECT","OUTPUT","PROGRESS","Q","RUBY","SAMP","SCRIPT","SELECT","SMALL","SPAN","STRONG","SUB","SUP","TEXTAREA","TIME","VAR","WBR"],CLASSES_TO_PRESERVE:["page"],HTML_ESCAPE_MAP:{lt:"<",gt:">",amp:"&",quot:\'"\',apos:"\'"},_postProcessContent:function(t){this._fixRelativeUris(t),this._simplifyNestedElements(t),this._keepClasses||this._cleanClasses(t)},_removeNodes:function(t,e){if(this._docJSDOMParser&&t._isLiveNodeList)throw new Error("Do not pass live node lists to _removeNodes");for(var i=t.length-1;i>=0;i--){var r=t[i],l=r.parentNode;l&&(!e||e.call(this,r,i,t))&&l.removeChild(r)}},_replaceNodeTags:function(t,e){if(this._docJSDOMParser&&t._isLiveNodeList)throw new Error("Do not pass live node lists to _replaceNodeTags");for(let i of t)this._setNodeTag(i,e)},_forEachNode:function(t,e){Array.prototype.forEach.call(t,e,this)},_findNode:function(t,e){return Array.prototype.find.call(t,e,this)},_someNode:function(t,e){return Array.prototype.some.call(t,e,this)},_everyNode:function(t,e){return Array.prototype.every.call(t,e,this)},_concatNodeLists:function(){var t=Array.prototype.slice,e=t.call(arguments),i=e.map(function(r){return t.call(r)});return Array.prototype.concat.apply([],i)},_getAllNodesWithTag:function(t,e){return t.querySelectorAll?t.querySelectorAll(e.join(",")):[].concat.apply([],e.map(function(i){var r=t.getElementsByTagName(i);return Array.isArray(r)?r:Array.from(r)}))},_cleanClasses:function(t){var e=this._classesToPreserve,i=(t.getAttribute("class")||"").split(/\\s+/).filter(function(r){return e.indexOf(r)!=-1}).join(" ");for(i?t.setAttribute("class",i):t.removeAttribute("class"),t=t.firstElementChild;t;t=t.nextElementSibling)this._cleanClasses(t)},_fixRelativeUris:function(t){var e=this._doc.baseURI,i=this._doc.documentURI;function r(s){if(e==i&&s.charAt(0)=="#")return s;try{return new URL(s,e).href}catch(h){}return s}var l=this._getAllNodesWithTag(t,["a"]);this._forEachNode(l,function(s){var h=s.getAttribute("href");if(h)if(h.indexOf("javascript:")===0)if(s.childNodes.length===1&&s.childNodes[0].nodeType===this.TEXT_NODE){var c=this._doc.createTextNode(s.textContent);s.parentNode.replaceChild(c,s)}else{for(var n=this._doc.createElement("span");s.firstChild;)n.appendChild(s.firstChild);s.parentNode.replaceChild(n,s)}else s.setAttribute("href",r(h))});var a=this._getAllNodesWithTag(t,["img","picture","figure","video","audio","source"]);this._forEachNode(a,function(s){var h=s.getAttribute("src"),c=s.getAttribute("poster"),n=s.getAttribute("srcset");if(h&&s.setAttribute("src",r(h)),c&&s.setAttribute("poster",r(c)),n){var u=n.replace(this.REGEXPS.srcsetUrl,function(m,b,N,v){return r(b)+(N||"")+v});s.setAttribute("srcset",u)}})},_simplifyNestedElements:function(t){for(var e=t;e;){if(e.parentNode&&["DIV","SECTION"].includes(e.tagName)&&!(e.id&&e.id.startsWith("readability"))){if(this._isElementWithoutContent(e)){e=this._removeAndGetNext(e);continue}else if(this._hasSingleTagInsideElement(e,"DIV")||this._hasSingleTagInsideElement(e,"SECTION")){for(var i=e.children[0],r=0;r<e.attributes.length;r++)i.setAttribute(e.attributes[r].name,e.attributes[r].value);e.parentNode.replaceChild(i,e),e=i;continue}}e=this._getNextNode(e)}},_getArticleTitle:function(){var t=this._doc,e="",i="";try{e=i=t.title.trim(),typeof e!="string"&&(e=i=this._getInnerText(t.getElementsByTagName("title")[0]))}catch(u){}var r=!1;function l(u){return u.split(/\\s+/).length}if(/ [\\|\\-\\\\\\/>\xBB] /.test(e))r=/ [\\\\\\/>\xBB] /.test(e),e=i.replace(/(.*)[\\|\\-\\\\\\/>\xBB] .*/gi,"$1"),l(e)<3&&(e=i.replace(/[^\\|\\-\\\\\\/>\xBB]*[\\|\\-\\\\\\/>\xBB](.*)/gi,"$1"));else if(e.indexOf(": ")!==-1){var a=this._concatNodeLists(t.getElementsByTagName("h1"),t.getElementsByTagName("h2")),s=e.trim(),h=this._someNode(a,function(u){return u.textContent.trim()===s});h||(e=i.substring(i.lastIndexOf(":")+1),l(e)<3?e=i.substring(i.indexOf(":")+1):l(i.substr(0,i.indexOf(":")))>5&&(e=i))}else if(e.length>150||e.length<15){var c=t.getElementsByTagName("h1");c.length===1&&(e=this._getInnerText(c[0]))}e=e.trim().replace(this.REGEXPS.normalize," ");var n=l(e);return n<=4&&(!r||n!=l(i.replace(/[\\|\\-\\\\\\/>\xBB]+/g,""))-1)&&(e=i),e},_prepDocument:function(){var t=this._doc;this._removeNodes(this._getAllNodesWithTag(t,["style"])),t.body&&this._replaceBrs(t.body),this._replaceNodeTags(this._getAllNodesWithTag(t,["font"]),"SPAN")},_nextNode:function(t){for(var e=t;e&&e.nodeType!=this.ELEMENT_NODE&&this.REGEXPS.whitespace.test(e.textContent);)e=e.nextSibling;return e},_replaceBrs:function(t){this._forEachNode(this._getAllNodesWithTag(t,["br"]),function(e){for(var i=e.nextSibling,r=!1;(i=this._nextNode(i))&&i.tagName=="BR";){r=!0;var l=i.nextSibling;i.parentNode.removeChild(i),i=l}if(r){var a=this._doc.createElement("p");for(e.parentNode.replaceChild(a,e),i=a.nextSibling;i;){if(i.tagName=="BR"){var s=this._nextNode(i.nextSibling);if(s&&s.tagName=="BR")break}if(!this._isPhrasingContent(i))break;var h=i.nextSibling;a.appendChild(i),i=h}for(;a.lastChild&&this._isWhitespace(a.lastChild);)a.removeChild(a.lastChild);a.parentNode.tagName==="P"&&this._setNodeTag(a.parentNode,"DIV")}})},_setNodeTag:function(t,e){if(this.log("_setNodeTag",t,e),this._docJSDOMParser)return t.localName=e.toLowerCase(),t.tagName=e.toUpperCase(),t;for(var i=t.ownerDocument.createElement(e);t.firstChild;)i.appendChild(t.firstChild);t.parentNode.replaceChild(i,t),t.readability&&(i.readability=t.readability);for(var r=0;r<t.attributes.length;r++)try{i.setAttribute(t.attributes[r].name,t.attributes[r].value)}catch(l){}return i},_prepArticle:function(t){this._cleanStyles(t),this._markDataTables(t),this._fixLazyImages(t),this._cleanConditionally(t,"form"),this._cleanConditionally(t,"fieldset"),this._clean(t,"object"),this._clean(t,"embed"),this._clean(t,"footer"),this._clean(t,"link"),this._clean(t,"aside");var e=this.DEFAULT_CHAR_THRESHOLD;this._forEachNode(t.children,function(i){this._cleanMatchedNodes(i,function(r,l){return this.REGEXPS.shareElements.test(l)&&r.textContent.length<e})}),this._clean(t,"iframe"),this._clean(t,"input"),this._clean(t,"textarea"),this._clean(t,"select"),this._clean(t,"button"),this._cleanHeaders(t),this._cleanConditionally(t,"table"),this._cleanConditionally(t,"ul"),this._cleanConditionally(t,"div"),this._replaceNodeTags(this._getAllNodesWithTag(t,["h1"]),"h2"),this._removeNodes(this._getAllNodesWithTag(t,["p"]),function(i){var r=i.getElementsByTagName("img").length,l=i.getElementsByTagName("embed").length,a=i.getElementsByTagName("object").length,s=i.getElementsByTagName("iframe").length,h=r+l+a+s;return h===0&&!this._getInnerText(i,!1)}),this._forEachNode(this._getAllNodesWithTag(t,["br"]),function(i){var r=this._nextNode(i.nextSibling);r&&r.tagName=="P"&&i.parentNode.removeChild(i)}),this._forEachNode(this._getAllNodesWithTag(t,["table"]),function(i){var r=this._hasSingleTagInsideElement(i,"TBODY")?i.firstElementChild:i;if(this._hasSingleTagInsideElement(r,"TR")){var l=r.firstElementChild;if(this._hasSingleTagInsideElement(l,"TD")){var a=l.firstElementChild;a=this._setNodeTag(a,this._everyNode(a.childNodes,this._isPhrasingContent)?"P":"DIV"),i.parentNode.replaceChild(a,i)}}})},_initializeNode:function(t){switch(t.readability={contentScore:0},t.tagName){case"DIV":t.readability.contentScore+=5;break;case"PRE":case"TD":case"BLOCKQUOTE":t.readability.contentScore+=3;break;case"ADDRESS":case"OL":case"UL":case"DL":case"DD":case"DT":case"LI":case"FORM":t.readability.contentScore-=3;break;case"H1":case"H2":case"H3":case"H4":case"H5":case"H6":case"TH":t.readability.contentScore-=5;break}t.readability.contentScore+=this._getClassWeight(t)},_removeAndGetNext:function(t){var e=this._getNextNode(t,!0);return t.parentNode.removeChild(t),e},_getNextNode:function(t,e){if(!e&&t.firstElementChild)return t.firstElementChild;if(t.nextElementSibling)return t.nextElementSibling;do t=t.parentNode;while(t&&!t.nextElementSibling);return t&&t.nextElementSibling},_textSimilarity:function(t,e){var i=t.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean),r=e.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean);if(!i.length||!r.length)return 0;var l=r.filter(s=>!i.includes(s)),a=l.join(" ").length/r.join(" ").length;return 1-a},_checkByline:function(t,e){if(this._articleByline)return!1;if(t.getAttribute!==void 0)var i=t.getAttribute("rel"),r=t.getAttribute("itemprop");return(i==="author"||r&&r.indexOf("author")!==-1||this.REGEXPS.byline.test(e))&&this._isValidByline(t.textContent)?(this._articleByline=t.textContent.trim(),!0):!1},_getNodeAncestors:function(t,e){e=e||0;for(var i=0,r=[];t.parentNode&&(r.push(t.parentNode),!(e&&++i===e));)t=t.parentNode;return r},_grabArticle:function(t){this.log("**** grabArticle ****");var e=this._doc,i=t!==null;if(t=t||this._doc.body,!t)return this.log("No body found in document. Abort."),null;for(var r=t.innerHTML;;){this.log("Starting grabArticle loop");var l=this._flagIsActive(this.FLAG_STRIP_UNLIKELYS),a=[],s=this._doc.documentElement;let J=!0;for(;s;){s.tagName==="HTML"&&(this._articleLang=s.getAttribute("lang"));var h=s.className+" "+s.id;if(!this._isProbablyVisible(s)){this.log("Removing hidden node - "+h),s=this._removeAndGetNext(s);continue}if(s.getAttribute("aria-modal")=="true"&&s.getAttribute("role")=="dialog"){s=this._removeAndGetNext(s);continue}if(this._checkByline(s,h)){s=this._removeAndGetNext(s);continue}if(J&&this._headerDuplicatesTitle(s)){this.log("Removing header: ",s.textContent.trim(),this._articleTitle.trim()),J=!1,s=this._removeAndGetNext(s);continue}if(l){if(this.REGEXPS.unlikelyCandidates.test(h)&&!this.REGEXPS.okMaybeItsACandidate.test(h)&&!this._hasAncestorTag(s,"table")&&!this._hasAncestorTag(s,"code")&&s.tagName!=="BODY"&&s.tagName!=="A"){this.log("Removing unlikely candidate - "+h),s=this._removeAndGetNext(s);continue}if(this.UNLIKELY_ROLES.includes(s.getAttribute("role"))){this.log("Removing content with role "+s.getAttribute("role")+" - "+h),s=this._removeAndGetNext(s);continue}}if((s.tagName==="DIV"||s.tagName==="SECTION"||s.tagName==="HEADER"||s.tagName==="H1"||s.tagName==="H2"||s.tagName==="H3"||s.tagName==="H4"||s.tagName==="H5"||s.tagName==="H6")&&this._isElementWithoutContent(s)){s=this._removeAndGetNext(s);continue}if(this.DEFAULT_TAGS_TO_SCORE.indexOf(s.tagName)!==-1&&a.push(s),s.tagName==="DIV"){for(var c=null,n=s.firstChild;n;){var u=n.nextSibling;if(this._isPhrasingContent(n))c!==null?c.appendChild(n):this._isWhitespace(n)||(c=e.createElement("p"),s.replaceChild(c,n),c.appendChild(n));else if(c!==null){for(;c.lastChild&&this._isWhitespace(c.lastChild);)c.removeChild(c.lastChild);c=null}n=u}if(this._hasSingleTagInsideElement(s,"P")&&this._getLinkDensity(s)<.25){var m=s.children[0];s.parentNode.replaceChild(m,s),s=m,a.push(s)}else this._hasChildBlockElement(s)||(s=this._setNodeTag(s,"P"),a.push(s))}s=this._getNextNode(s)}var b=[];this._forEachNode(a,function(A){if(!(!A.parentNode||typeof A.parentNode.tagName=="undefined")){var T=this._getInnerText(A);if(!(T.length<25)){var K=this._getNodeAncestors(A,5);if(K.length!==0){var C=0;C+=1,C+=T.split(this.REGEXPS.commas).length,C+=Math.min(Math.floor(T.length/100),3),this._forEachNode(K,function(S,F){if(!(!S.tagName||!S.parentNode||typeof S.parentNode.tagName=="undefined")){if(typeof S.readability=="undefined"&&(this._initializeNode(S),b.push(S)),F===0)var X=1;else F===1?X=2:X=F*3;S.readability.contentScore+=C/X}})}}}});for(var N=[],v=0,y=b.length;v<y;v+=1){var E=b[v],d=E.readability.contentScore*(1-this._getLinkDensity(E));E.readability.contentScore=d,this.log("Candidate:",E,"with score "+d);for(var p=0;p<this._nbTopCandidates;p++){var x=N[p];if(!x||d>x.readability.contentScore){N.splice(p,0,E),N.length>this._nbTopCandidates&&N.pop();break}}}var o=N[0]||null,L=!1,g;if(o===null||o.tagName==="BODY"){for(o=e.createElement("DIV"),L=!0;t.firstChild;)this.log("Moving child out:",t.firstChild),o.appendChild(t.firstChild);t.appendChild(o),this._initializeNode(o)}else if(o){for(var I=[],P=1;P<N.length;P++)N[P].readability.contentScore/o.readability.contentScore>=.75&&I.push(this._getNodeAncestors(N[P]));var O=3;if(I.length>=O)for(g=o.parentNode;g.tagName!=="BODY";){for(var G=0,H=0;H<I.length&&G<O;H++)G+=Number(I[H].includes(g));if(G>=O){o=g;break}g=g.parentNode}o.readability||this._initializeNode(o),g=o.parentNode;for(var M=o.readability.contentScore,Q=M/3;g.tagName!=="BODY";){if(!g.readability){g=g.parentNode;continue}var V=g.readability.contentScore;if(V<Q)break;if(V>M){o=g;break}M=g.readability.contentScore,g=g.parentNode}for(g=o.parentNode;g.tagName!="BODY"&&g.children.length==1;)o=g,g=o.parentNode;o.readability||this._initializeNode(o)}var _=e.createElement("DIV");i&&(_.id="readability-content");var Z=Math.max(10,o.readability.contentScore*.2);g=o.parentNode;for(var U=g.children,w=0,j=U.length;w<j;w++){var f=U[w],R=!1;if(this.log("Looking at sibling node:",f,f.readability?"with score "+f.readability.contentScore:""),this.log("Sibling has score",f.readability?f.readability.contentScore:"Unknown"),f===o)R=!0;else{var $=0;if(f.className===o.className&&o.className!==""&&($+=o.readability.contentScore*.2),f.readability&&f.readability.contentScore+$>=Z)R=!0;else if(f.nodeName==="P"){var Y=this._getLinkDensity(f),z=this._getInnerText(f),k=z.length;(k>80&&Y<.25||k<80&&k>0&&Y===0&&z.search(/\\.( |$)/)!==-1)&&(R=!0)}}R&&(this.log("Appending node:",f),this.ALTER_TO_DIV_EXCEPTIONS.indexOf(f.nodeName)===-1&&(this.log("Altering sibling:",f,"to div."),f=this._setNodeTag(f,"DIV")),_.appendChild(f),U=g.children,w-=1,j-=1)}if(this._debug&&this.log("Article content pre-prep: "+_.innerHTML),this._prepArticle(_),this._debug&&this.log("Article content post-prep: "+_.innerHTML),L)o.id="readability-page-1",o.className="page";else{var B=e.createElement("DIV");for(B.id="readability-page-1",B.className="page";_.firstChild;)B.appendChild(_.firstChild);_.appendChild(B)}this._debug&&this.log("Article content after paging: "+_.innerHTML);var W=!0,D=this._getInnerText(_,!0).length;if(D<this._charThreshold)if(W=!1,t.innerHTML=r,this._flagIsActive(this.FLAG_STRIP_UNLIKELYS))this._removeFlag(this.FLAG_STRIP_UNLIKELYS),this._attempts.push({articleContent:_,textLength:D});else if(this._flagIsActive(this.FLAG_WEIGHT_CLASSES))this._removeFlag(this.FLAG_WEIGHT_CLASSES),this._attempts.push({articleContent:_,textLength:D});else if(this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY))this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY),this._attempts.push({articleContent:_,textLength:D});else{if(this._attempts.push({articleContent:_,textLength:D}),this._attempts.sort(function(A,T){return T.textLength-A.textLength}),!this._attempts[0].textLength)return null;_=this._attempts[0].articleContent,W=!0}if(W){var tt=[g,o].concat(this._getNodeAncestors(g));return this._someNode(tt,function(A){if(!A.tagName)return!1;var T=A.getAttribute("dir");return T?(this._articleDir=T,!0):!1}),_}}},_isValidByline:function(t){return typeof t=="string"||t instanceof String?(t=t.trim(),t.length>0&&t.length<100):!1},_unescapeHtmlEntities:function(t){if(!t)return t;var e=this.HTML_ESCAPE_MAP;return t.replace(/&(quot|amp|apos|lt|gt);/g,function(i,r){return e[r]}).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi,function(i,r,l){var a=parseInt(r||l,r?16:10);return String.fromCharCode(a)})},_getJSONLD:function(t){var e=this._getAllNodesWithTag(t,["script"]),i;return this._forEachNode(e,function(r){if(!i&&r.getAttribute("type")==="application/ld+json")try{var l=r.textContent.replace(/^\\s*<!\\[CDATA\\[|\\]\\]>\\s*$/g,""),a=JSON.parse(l);if(!a["@context"]||!a["@context"].match(/^https?\\:\\/\\/schema\\.org$/)||(!a["@type"]&&Array.isArray(a["@graph"])&&(a=a["@graph"].find(function(n){return(n["@type"]||"").match(this.REGEXPS.jsonLdArticleTypes)})),!a||!a["@type"]||!a["@type"].match(this.REGEXPS.jsonLdArticleTypes)))return;if(i={},typeof a.name=="string"&&typeof a.headline=="string"&&a.name!==a.headline){var s=this._getArticleTitle(),h=this._textSimilarity(a.name,s)>.75,c=this._textSimilarity(a.headline,s)>.75;c&&!h?i.title=a.headline:i.title=a.name}else typeof a.name=="string"?i.title=a.name.trim():typeof a.headline=="string"&&(i.title=a.headline.trim());a.author&&(typeof a.author.name=="string"?i.byline=a.author.name.trim():Array.isArray(a.author)&&a.author[0]&&typeof a.author[0].name=="string"&&(i.byline=a.author.filter(function(n){return n&&typeof n.name=="string"}).map(function(n){return n.name.trim()}).join(", "))),typeof a.description=="string"&&(i.excerpt=a.description.trim()),a.publisher&&typeof a.publisher.name=="string"&&(i.siteName=a.publisher.name.trim()),typeof a.datePublished=="string"&&(i.datePublished=a.datePublished.trim());return}catch(n){this.log(n.message)}}),i||{}},_getArticleMetadata:function(t){var e={},i={},r=this._doc.getElementsByTagName("meta"),l=/\\s*(article|dc|dcterm|og|twitter)\\s*:\\s*(author|creator|description|published_time|title|site_name)\\s*/gi,a=/^\\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\\s*[\\.:]\\s*)?(author|creator|description|title|site_name)\\s*$/i;return this._forEachNode(r,function(s){var h=s.getAttribute("name"),c=s.getAttribute("property"),n=s.getAttribute("content");if(n){var u=null,m=null;c&&(u=c.match(l),u&&(m=u[0].toLowerCase().replace(/\\s/g,""),i[m]=n.trim())),!u&&h&&a.test(h)&&(m=h,n&&(m=m.toLowerCase().replace(/\\s/g,"").replace(/\\./g,":"),i[m]=n.trim()))}}),e.title=t.title||i["dc:title"]||i["dcterm:title"]||i["og:title"]||i["weibo:article:title"]||i["weibo:webpage:title"]||i.title||i["twitter:title"],e.title||(e.title=this._getArticleTitle()),e.byline=t.byline||i["dc:creator"]||i["dcterm:creator"]||i.author,e.excerpt=t.excerpt||i["dc:description"]||i["dcterm:description"]||i["og:description"]||i["weibo:article:description"]||i["weibo:webpage:description"]||i.description||i["twitter:description"],e.siteName=t.siteName||i["og:site_name"],e.publishedTime=t.datePublished||i["article:published_time"]||null,e.title=this._unescapeHtmlEntities(e.title),e.byline=this._unescapeHtmlEntities(e.byline),e.excerpt=this._unescapeHtmlEntities(e.excerpt),e.siteName=this._unescapeHtmlEntities(e.siteName),e.publishedTime=this._unescapeHtmlEntities(e.publishedTime),e},_isSingleImage:function(t){return t.tagName==="IMG"?!0:t.children.length!==1||t.textContent.trim()!==""?!1:this._isSingleImage(t.children[0])},_unwrapNoscriptImages:function(t){var e=Array.from(t.getElementsByTagName("img"));this._forEachNode(e,function(r){for(var l=0;l<r.attributes.length;l++){var a=r.attributes[l];switch(a.name){case"src":case"srcset":case"data-src":case"data-srcset":return}if(/\\.(jpg|jpeg|png|webp)/i.test(a.value))return}r.parentNode.removeChild(r)});var i=Array.from(t.getElementsByTagName("noscript"));this._forEachNode(i,function(r){var l=t.createElement("div");if(l.innerHTML=r.innerHTML,!!this._isSingleImage(l)){var a=r.previousElementSibling;if(a&&this._isSingleImage(a)){var s=a;s.tagName!=="IMG"&&(s=a.getElementsByTagName("img")[0]);for(var h=l.getElementsByTagName("img")[0],c=0;c<s.attributes.length;c++){var n=s.attributes[c];if(n.value!==""&&(n.name==="src"||n.name==="srcset"||/\\.(jpg|jpeg|png|webp)/i.test(n.value))){if(h.getAttribute(n.name)===n.value)continue;var u=n.name;h.hasAttribute(u)&&(u="data-old-"+u),h.setAttribute(u,n.value)}}r.parentNode.replaceChild(l.firstElementChild,a)}}})},_removeScripts:function(t){this._removeNodes(this._getAllNodesWithTag(t,["script","noscript"]))},_hasSingleTagInsideElement:function(t,e){return t.children.length!=1||t.children[0].tagName!==e?!1:!this._someNode(t.childNodes,function(i){return i.nodeType===this.TEXT_NODE&&this.REGEXPS.hasContent.test(i.textContent)})},_isElementWithoutContent:function(t){return t.nodeType===this.ELEMENT_NODE&&t.textContent.trim().length==0&&(t.children.length==0||t.children.length==t.getElementsByTagName("br").length+t.getElementsByTagName("hr").length)},_hasChildBlockElement:function(t){return this._someNode(t.childNodes,function(e){return this.DIV_TO_P_ELEMS.has(e.tagName)||this._hasChildBlockElement(e)})},_isPhrasingContent:function(t){return t.nodeType===this.TEXT_NODE||this.PHRASING_ELEMS.indexOf(t.tagName)!==-1||(t.tagName==="A"||t.tagName==="DEL"||t.tagName==="INS")&&this._everyNode(t.childNodes,this._isPhrasingContent)},_isWhitespace:function(t){return t.nodeType===this.TEXT_NODE&&t.textContent.trim().length===0||t.nodeType===this.ELEMENT_NODE&&t.tagName==="BR"},_getInnerText:function(t,e){e=typeof e=="undefined"?!0:e;var i=t.textContent.trim();return e?i.replace(this.REGEXPS.normalize," "):i},_getCharCount:function(t,e){return e=e||",",this._getInnerText(t).split(e).length-1},_cleanStyles:function(t){if(!(!t||t.tagName.toLowerCase()==="svg")){for(var e=0;e<this.PRESENTATIONAL_ATTRIBUTES.length;e++)t.removeAttribute(this.PRESENTATIONAL_ATTRIBUTES[e]);this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.indexOf(t.tagName)!==-1&&(t.removeAttribute("width"),t.removeAttribute("height"));for(var i=t.firstElementChild;i!==null;)this._cleanStyles(i),i=i.nextElementSibling}},_getLinkDensity:function(t){var e=this._getInnerText(t).length;if(e===0)return 0;var i=0;return this._forEachNode(t.getElementsByTagName("a"),function(r){var l=r.getAttribute("href"),a=l&&this.REGEXPS.hashUrl.test(l)?.3:1;i+=this._getInnerText(r).length*a}),i/e},_getClassWeight:function(t){if(!this._flagIsActive(this.FLAG_WEIGHT_CLASSES))return 0;var e=0;return typeof t.className=="string"&&t.className!==""&&(this.REGEXPS.negative.test(t.className)&&(e-=25),this.REGEXPS.positive.test(t.className)&&(e+=25)),typeof t.id=="string"&&t.id!==""&&(this.REGEXPS.negative.test(t.id)&&(e-=25),this.REGEXPS.positive.test(t.id)&&(e+=25)),e},_clean:function(t,e){var i=["object","embed","iframe"].indexOf(e)!==-1;this._removeNodes(this._getAllNodesWithTag(t,[e]),function(r){if(i){for(var l=0;l<r.attributes.length;l++)if(this._allowedVideoRegex.test(r.attributes[l].value))return!1;if(r.tagName==="object"&&this._allowedVideoRegex.test(r.innerHTML))return!1}return!0})},_hasAncestorTag:function(t,e,i,r){i=i||3,e=e.toUpperCase();for(var l=0;t.parentNode;){if(i>0&&l>i)return!1;if(t.parentNode.tagName===e&&(!r||r(t.parentNode)))return!0;t=t.parentNode,l++}return!1},_getRowAndColumnCount:function(t){for(var e=0,i=0,r=t.getElementsByTagName("tr"),l=0;l<r.length;l++){var a=r[l].getAttribute("rowspan")||0;a&&(a=parseInt(a,10)),e+=a||1;for(var s=0,h=r[l].getElementsByTagName("td"),c=0;c<h.length;c++){var n=h[c].getAttribute("colspan")||0;n&&(n=parseInt(n,10)),s+=n||1}i=Math.max(i,s)}return{rows:e,columns:i}},_markDataTables:function(t){for(var e=t.getElementsByTagName("table"),i=0;i<e.length;i++){var r=e[i],l=r.getAttribute("role");if(l=="presentation"){r._readabilityDataTable=!1;continue}var a=r.getAttribute("datatable");if(a=="0"){r._readabilityDataTable=!1;continue}var s=r.getAttribute("summary");if(s){r._readabilityDataTable=!0;continue}var h=r.getElementsByTagName("caption")[0];if(h&&h.childNodes.length>0){r._readabilityDataTable=!0;continue}var c=["col","colgroup","tfoot","thead","th"],n=function(m){return!!r.getElementsByTagName(m)[0]};if(c.some(n)){this.log("Data table because found data-y descendant"),r._readabilityDataTable=!0;continue}if(r.getElementsByTagName("table")[0]){r._readabilityDataTable=!1;continue}var u=this._getRowAndColumnCount(r);if(u.rows>=10||u.columns>4){r._readabilityDataTable=!0;continue}r._readabilityDataTable=u.rows*u.columns>10}},_fixLazyImages:function(t){this._forEachNode(this._getAllNodesWithTag(t,["img","picture","figure"]),function(e){if(e.src&&this.REGEXPS.b64DataUrl.test(e.src)){var i=this.REGEXPS.b64DataUrl.exec(e.src);if(i[1]==="image/svg+xml")return;for(var r=!1,l=0;l<e.attributes.length;l++){var a=e.attributes[l];if(a.name!=="src"&&/\\.(jpg|jpeg|png|webp)/i.test(a.value)){r=!0;break}}if(r){var s=e.src.search(/base64\\s*/i)+7,h=e.src.length-s;h<133&&e.removeAttribute("src")}}if(!((e.src||e.srcset&&e.srcset!="null")&&e.className.toLowerCase().indexOf("lazy")===-1)){for(var c=0;c<e.attributes.length;c++)if(a=e.attributes[c],!(a.name==="src"||a.name==="srcset"||a.name==="alt")){var n=null;if(/\\.(jpg|jpeg|png|webp)\\s+\\d/.test(a.value)?n="srcset":/^\\s*\\S+\\.(jpg|jpeg|png|webp)\\S*\\s*$/.test(a.value)&&(n="src"),n){if(e.tagName==="IMG"||e.tagName==="PICTURE")e.setAttribute(n,a.value);else if(e.tagName==="FIGURE"&&!this._getAllNodesWithTag(e,["img","picture"]).length){var u=this._doc.createElement("img");u.setAttribute(n,a.value),e.appendChild(u)}}}}})},_getTextDensity:function(t,e){var i=this._getInnerText(t,!0).length;if(i===0)return 0;var r=0,l=this._getAllNodesWithTag(t,e);return this._forEachNode(l,a=>r+=this._getInnerText(a,!0).length),r/i},_cleanConditionally:function(t,e){this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)&&this._removeNodes(this._getAllNodesWithTag(t,[e]),function(i){var r=function(g){return g._readabilityDataTable},l=e==="ul"||e==="ol";if(!l){var a=0,s=this._getAllNodesWithTag(i,["ul","ol"]);this._forEachNode(s,g=>a+=this._getInnerText(g).length),l=a/this._getInnerText(i).length>.9}if(e==="table"&&r(i)||this._hasAncestorTag(i,"table",-1,r)||this._hasAncestorTag(i,"code"))return!1;var h=this._getClassWeight(i);this.log("Cleaning Conditionally",i);var c=0;if(h+c<0)return!0;if(this._getCharCount(i,",")<10){for(var n=i.getElementsByTagName("p").length,u=i.getElementsByTagName("img").length,m=i.getElementsByTagName("li").length-100,b=i.getElementsByTagName("input").length,N=this._getTextDensity(i,["h1","h2","h3","h4","h5","h6"]),v=0,y=this._getAllNodesWithTag(i,["object","embed","iframe"]),E=0;E<y.length;E++){for(var d=0;d<y[E].attributes.length;d++)if(this._allowedVideoRegex.test(y[E].attributes[d].value))return!1;if(y[E].tagName==="object"&&this._allowedVideoRegex.test(y[E].innerHTML))return!1;v++}var p=this._getLinkDensity(i),x=this._getInnerText(i).length,o=u>1&&n/u<.5&&!this._hasAncestorTag(i,"figure")||!l&&m>n||b>Math.floor(n/3)||!l&&N<.9&&x<25&&(u===0||u>2)&&!this._hasAncestorTag(i,"figure")||!l&&h<25&&p>.2||h>=25&&p>.5||v===1&&x<75||v>1;if(l&&o){for(var L=0;L<i.children.length;L++)if(i.children[L].children.length>1)return o;let g=i.getElementsByTagName("li").length;if(u==g)return!1}return o}return!1})},_cleanMatchedNodes:function(t,e){for(var i=this._getNextNode(t,!0),r=this._getNextNode(t);r&&r!=i;)e.call(this,r,r.className+" "+r.id)?r=this._removeAndGetNext(r):r=this._getNextNode(r)},_cleanHeaders:function(t){let e=this._getAllNodesWithTag(t,["h1","h2"]);this._removeNodes(e,function(i){let r=this._getClassWeight(i)<0;return r&&this.log("Removing header with low class weight:",i),r})},_headerDuplicatesTitle:function(t){if(t.tagName!="H1"&&t.tagName!="H2")return!1;var e=this._getInnerText(t,!1);return this.log("Evaluating similarity of header:",e,this._articleTitle),this._textSimilarity(this._articleTitle,e)>.75},_flagIsActive:function(t){return(this._flags&t)>0},_removeFlag:function(t){this._flags=this._flags&~t},_isProbablyVisible:function(t){return(!t.style||t.style.display!="none")&&(!t.style||t.style.visibility!="hidden")&&!t.hasAttribute("hidden")&&(!t.hasAttribute("aria-hidden")||t.getAttribute("aria-hidden")!="true"||t.className&&t.className.indexOf&&t.className.indexOf("fallback-image")!==-1)},parse:function(){if(this._maxElemsToParse>0){var t=this._doc.getElementsByTagName("*").length;if(t>this._maxElemsToParse)throw new Error("Aborting parsing document; "+t+" elements found")}this._unwrapNoscriptImages(this._doc);var e=this._disableJSONLD?{}:this._getJSONLD(this._doc);this._removeScripts(this._doc),this._prepDocument();var i=this._getArticleMetadata(e);this._articleTitle=i.title;var r=this._grabArticle();if(!r)return null;if(this.log("Grabbed: "+r.innerHTML),this._postProcessContent(r),!i.excerpt){var l=r.getElementsByTagName("p");l.length>0&&(i.excerpt=l[0].textContent.trim())}var a=r.textContent;return{title:this._articleTitle,byline:i.byline||this._articleByline,dir:this._articleDir,lang:this._articleLang,content:this._serializer(r),textContent:a,length:a.length,excerpt:i.excerpt,siteName:i.siteName||this._articleSiteName,publishedTime:i.publishedTime}}};typeof module=="object"&&(module.exports=q);\n';
622
+
623
+ // src/libs/browser-search/search.ts
624
+ import { defaultLogger as defaultLogger3 } from "@agent-infra/logger";
625
+
626
+ // src/libs/browser-search/utils.ts
627
+ import Turndown from "turndown";
628
+ import { gfm } from "turndown-plugin-gfm";
629
+ import { defaultLogger as logger } from "@agent-infra/logger";
630
+ import UserAgent from "user-agents";
631
+ var parseUrl = (url2) => {
632
+ try {
633
+ return new URL(url2);
634
+ } catch {
635
+ return null;
636
+ }
637
+ };
638
+ var shouldSkipDomain = (url2) => {
639
+ const parsed = parseUrl(url2);
640
+ if (!parsed) return true;
641
+ const { hostname } = parsed;
642
+ return [
643
+ "reddit.com",
644
+ "www.reddit.com",
645
+ "x.com",
646
+ "twitter.com",
647
+ "www.twitter.com",
648
+ "youtube.com",
649
+ "www.youtube.com"
650
+ ].includes(hostname);
651
+ };
652
+ async function applyStealthScripts(page) {
653
+ const userAgent = new UserAgent({
654
+ deviceCategory: "desktop"
655
+ }).toString();
656
+ await page.setBypassCSP(true);
657
+ await page.setUserAgent(userAgent);
658
+ await page.evaluate(() => {
659
+ Object.defineProperty(navigator, "webdriver", {
660
+ get: () => void 0
661
+ });
662
+ Object.defineProperty(navigator, "languages", {
663
+ get: () => ["en-US", "en"]
664
+ });
665
+ Object.defineProperty(navigator, "plugins", {
666
+ get: () => [{}, {}, {}, {}, {}]
667
+ });
668
+ Object.defineProperty(navigator, "headless", {
669
+ get: () => false
670
+ });
671
+ const originalQuery = window.navigator.permissions.query;
672
+ window.navigator.permissions.query = (parameters) => parameters.name === "notifications" ? Promise.resolve({
673
+ state: Notification.permission
674
+ }) : originalQuery(parameters);
675
+ });
676
+ }
677
+ async function interceptRequest(page) {
678
+ await applyStealthScripts(page);
679
+ await page.setRequestInterception(true);
680
+ page.on("request", (request) => {
681
+ const resourceType = request.resourceType();
682
+ if (resourceType !== "document") {
683
+ return request.abort();
684
+ }
685
+ if (request.isNavigationRequest()) {
686
+ return request.continue();
687
+ }
688
+ return request.abort();
689
+ });
690
+ }
691
+ function extractPageInformation(window2, readabilityScript) {
692
+ const Readability = new Function(
693
+ "module",
694
+ `${readabilityScript}
695
+ return module.exports`
696
+ )({});
697
+ const document2 = window2.document;
698
+ document2.querySelectorAll(
699
+ "script,noscript,style,link,svg,img,video,iframe,canvas,.reflist"
700
+ ).forEach((el) => el.remove());
701
+ const article = new Readability(document2).parse();
702
+ const content = article?.content || "";
703
+ const title = document2.title;
121
704
  return {
122
- results,
123
- success: true
705
+ content,
706
+ title: article?.title || title
124
707
  };
125
708
  }
709
+ function toMarkdown(html, options = {}) {
710
+ if (!html) return "";
711
+ try {
712
+ const {
713
+ codeBlockStyle = "fenced",
714
+ headingStyle = "atx",
715
+ emDelimiter = "*",
716
+ strongDelimiter = "**",
717
+ gfmExtension = true
718
+ } = options;
719
+ const turndown = new Turndown({
720
+ codeBlockStyle,
721
+ headingStyle,
722
+ emDelimiter,
723
+ strongDelimiter
724
+ });
725
+ if (gfmExtension) {
726
+ turndown.use(gfm);
727
+ }
728
+ return turndown.turndown(html);
729
+ } catch (error) {
730
+ logger.error("Error converting HTML to Markdown:", error);
731
+ return html;
732
+ }
733
+ }
734
+
735
+ // src/libs/browser-search/queue.ts
736
+ var PromiseQueue = class {
737
+ queue = [];
738
+ concurrency;
739
+ running = 0;
740
+ results = [];
741
+ constructor(concurrency = 1) {
742
+ this.concurrency = concurrency;
743
+ }
744
+ add(task) {
745
+ return new Promise((resolve, reject) => {
746
+ this.queue.push(async () => {
747
+ try {
748
+ const result = await task();
749
+ resolve(result);
750
+ return result;
751
+ } catch (error) {
752
+ reject(error);
753
+ throw error;
754
+ }
755
+ });
756
+ this.run();
757
+ });
758
+ }
759
+ async run() {
760
+ if (this.running >= this.concurrency || this.queue.length === 0) {
761
+ return;
762
+ }
763
+ this.running++;
764
+ const task = this.queue.shift();
765
+ try {
766
+ const result = await task();
767
+ this.results.push(result);
768
+ } catch (error) {
769
+ } finally {
770
+ this.running--;
771
+ this.run();
772
+ }
773
+ }
774
+ async waitAll() {
775
+ while (this.running > 0 || this.queue.length > 0) {
776
+ await new Promise((resolve) => setTimeout(resolve, 100));
777
+ }
778
+ return this.results;
779
+ }
780
+ };
781
+
782
+ // src/libs/browser-search/engines/bing.ts
783
+ var BingSearchEngine = class {
784
+ /**
785
+ * Generates a Bing search URL based on the provided query and options.
786
+ *
787
+ * @param query - The search query string
788
+ * @param options - Search configuration options
789
+ * @param options.count - Number of search results to request (default: 10)
790
+ * @param options.excludeDomains - Array of domain names to exclude from search results
791
+ * @returns Formatted Bing search URL as a string
792
+ */
793
+ getSearchUrl(query, options) {
794
+ const searchParams = new URLSearchParams({
795
+ q: `${options.excludeDomains && options.excludeDomains.length > 0 ? `${options.excludeDomains.map((domain) => `-site:${domain}`).join(" ")} ` : ""}${query}`,
796
+ count: `${options.count || 10}`
797
+ });
798
+ return `https://www.bing.com/search?${searchParams.toString()}`;
799
+ }
800
+ /**
801
+ * Extracts search results from a Bing search page.
802
+ *
803
+ * @param window - The browser window object containing the loaded Bing search page
804
+ * @returns Array of search results extracted from the page
805
+ */
806
+ extractSearchResults(window2) {
807
+ const links = [];
808
+ const document2 = window2.document;
809
+ const isValidUrl = (url2) => {
810
+ try {
811
+ new URL(url2);
812
+ return true;
813
+ } catch (error) {
814
+ return false;
815
+ }
816
+ };
817
+ const extractSnippet = (element) => {
818
+ const clone = element.cloneNode(true);
819
+ const titleElements = clone.querySelectorAll("h2");
820
+ titleElements.forEach((el) => el.remove());
821
+ const citeElements = clone.querySelectorAll(".b_attribution");
822
+ citeElements.forEach((el) => el.remove());
823
+ const scriptElements = clone.querySelectorAll("script, style");
824
+ scriptElements.forEach((el) => el.remove());
825
+ const text = Array.from(clone.querySelectorAll("*")).filter((node) => node.textContent?.trim()).map((node) => node.textContent?.trim()).filter(Boolean).reduce((acc, curr) => {
826
+ if (!acc.some(
827
+ (text2) => text2.includes(curr) || curr.includes(text2)
828
+ )) {
829
+ acc.push(curr);
830
+ }
831
+ return acc;
832
+ }, []).join(" ").trim().replace(/\s+/g, " ");
833
+ return text;
834
+ };
835
+ try {
836
+ const elements = document2.querySelectorAll(".b_algo");
837
+ elements.forEach((element) => {
838
+ const titleEl = element.querySelector("h2");
839
+ const urlEl = element.querySelector("h2 a");
840
+ const url2 = urlEl?.getAttribute("href");
841
+ const snippet = extractSnippet(element);
842
+ if (!url2 || !isValidUrl(url2)) return;
843
+ const item = {
844
+ title: titleEl?.textContent || "",
845
+ snippet,
846
+ url: url2,
847
+ content: ""
848
+ };
849
+ if (!item.title || !item.url) return;
850
+ links.push(item);
851
+ });
852
+ } catch (error) {
853
+ console.error("Error extracting search results from Bing:", error);
854
+ throw error;
855
+ }
856
+ return links;
857
+ }
858
+ /**
859
+ * Waits for Bing search results to load completely.
860
+ *
861
+ * @param page - The Puppeteer page object
862
+ * @returns Promise that resolves when search results are loaded
863
+ */
864
+ async waitForSearchResults(page, timeout) {
865
+ await page.waitForSelector("#b_results", {
866
+ timeout: timeout ?? 1e4
867
+ });
868
+ }
869
+ };
870
+
871
+ // src/libs/browser-search/engines/baidu.ts
872
+ var BaiduSearchEngine = class {
873
+ /**
874
+ * Generates a Baidu search URL based on the provided query and options.
875
+ *
876
+ * @param query - The search query string
877
+ * @param options - Search configuration options
878
+ * @param options.count - Number of search results to request (default: 10)
879
+ * @param options.excludeDomains - Array of domain names to exclude from search results
880
+ * @returns Formatted Baidu search URL as a string
881
+ */
882
+ getSearchUrl(query, options) {
883
+ const excludeDomainsQuery = options.excludeDomains && options.excludeDomains.length > 0 ? options.excludeDomains.map((domain) => `-site:${domain}`).join(" ") : "";
884
+ const searchParams = new URLSearchParams({
885
+ wd: excludeDomainsQuery ? `${excludeDomainsQuery} ${query}` : query,
886
+ rn: `${options.count || 10}`
887
+ // rn is the parameter for result count
888
+ });
889
+ return `https://www.baidu.com/s?${searchParams.toString()}`;
890
+ }
891
+ /**
892
+ * Extracts search results from a Baidu search page.
893
+ *
894
+ * @param window - The browser window object containing the loaded Baidu search page
895
+ * @returns Array of search results extracted from the page
896
+ */
897
+ extractSearchResults(window2) {
898
+ const links = [];
899
+ const document2 = window2.document;
900
+ try {
901
+ const elements = document2.querySelectorAll(".result");
902
+ elements.forEach((element) => {
903
+ const titleEl = element.querySelector(".t a");
904
+ const url2 = titleEl?.getAttribute("href");
905
+ const snippetEl = element.querySelector(".c-span-last .content-right_2s-H4");
906
+ if (!url2) return;
907
+ const item = {
908
+ title: titleEl?.textContent || "",
909
+ url: url2,
910
+ // Note: Baidu uses redirects, we'll need to follow them
911
+ snippet: snippetEl?.textContent || "",
912
+ content: ""
913
+ };
914
+ if (!item.title || !item.url) return;
915
+ links.push(item);
916
+ });
917
+ } catch (error) {
918
+ console.error("Error extracting search results from Baidu:", error);
919
+ }
920
+ return links;
921
+ }
922
+ /**
923
+ * Waits for Bing search results to load completely.
924
+ *
925
+ * @param page - The Puppeteer page object
926
+ * @returns Promise that resolves when search results are loaded
927
+ */
928
+ async waitForSearchResults(page, timeout) {
929
+ await page.waitForSelector("#page", {
930
+ timeout: timeout ?? 1e4
931
+ });
932
+ }
933
+ };
934
+
935
+ // src/libs/browser-search/engines/sogou.ts
936
+ var SogouSearchEngine = class {
937
+ /**
938
+ * Generates a Sogou search URL based on the provided query and options.
939
+ *
940
+ * @param query - The search query string
941
+ * @param options - Search configuration options
942
+ * @param options.count - Number of search results to request (default: 10)
943
+ * @param options.excludeDomains - Array of domain names to exclude from search results
944
+ * @returns Formatted Sogou search URL as a string
945
+ */
946
+ getSearchUrl(query, options) {
947
+ const { count = 10, excludeDomains = [] } = options;
948
+ const excludeDomainsQuery = excludeDomains && excludeDomains.length > 0 ? excludeDomains.map((domain) => `-site:${domain}`).join(" ") : "";
949
+ const searchParams = new URLSearchParams({
950
+ query: `${excludeDomainsQuery ? `${excludeDomainsQuery} ` : ""}${query}`,
951
+ num: `${count}`
952
+ });
953
+ return `https://www.sogou.com/web?${searchParams.toString()}`;
954
+ }
955
+ /**
956
+ * !NOTE: This function runs in the context of the browser page, not Node.js
957
+ *
958
+ * Extract search results from Sogou
959
+ * @param window - The window object
960
+ * @returns Search results
961
+ */
962
+ extractSearchResults(window2) {
963
+ const links = [];
964
+ const document2 = window2.document;
965
+ const isValidUrl = (url2) => {
966
+ try {
967
+ new URL(url2);
968
+ return true;
969
+ } catch (error) {
970
+ return false;
971
+ }
972
+ };
973
+ const EndPoints = "https://www.sogou.com";
974
+ const SELECTOR = {
975
+ results: ".results .vrwrap",
976
+ resultTitle: ".vr-title",
977
+ resultLink: ".vr-title > a",
978
+ resultSnippet: [".star-wiki", ".fz-mid", ".attribute-centent"],
979
+ resultSnippetExcluded: [".text-lightgray", ".zan-box", ".tag-website"],
980
+ related: "#main .vrwrap.middle-better-hintBox .hint-mid"
981
+ };
982
+ try {
983
+ const elements = document2.querySelectorAll(SELECTOR.results);
984
+ elements.forEach((element) => {
985
+ const titleEl = element.querySelector(SELECTOR.resultTitle);
986
+ let url2 = element.querySelector(SELECTOR.resultLink)?.getAttribute("href");
987
+ const snippets = SELECTOR.resultSnippet.map((selector) => {
988
+ SELECTOR.resultSnippetExcluded.forEach((excludedSelector) => {
989
+ const el2 = element.querySelector(excludedSelector);
990
+ el2?.remove();
991
+ });
992
+ const el = element.querySelector(selector);
993
+ return el?.textContent?.trim() || "";
994
+ });
995
+ if (!url2?.includes("http")) url2 = `${EndPoints}${url2}`;
996
+ if (!url2?.trim() || !isValidUrl(url2)) return;
997
+ const item = {
998
+ title: titleEl?.textContent?.trim() || "",
999
+ url: url2,
1000
+ snippet: snippets.join(""),
1001
+ content: ""
1002
+ };
1003
+ if (!item.title || !item.url) return;
1004
+ links.push(item);
1005
+ });
1006
+ } catch (error) {
1007
+ const msg = error instanceof Error ? error.message : String(error);
1008
+ console.error("Error extracting search results from Sogou:", msg);
1009
+ throw error;
1010
+ }
1011
+ return links;
1012
+ }
1013
+ /**
1014
+ * Waits for Bing search results to load completely.
1015
+ *
1016
+ * @param page - The Puppeteer page object
1017
+ * @returns Promise that resolves when search results are loaded
1018
+ */
1019
+ async waitForSearchResults(page, timeout) {
1020
+ await page.waitForSelector("#pagebar_container", {
1021
+ timeout: timeout ?? 1e4
1022
+ });
1023
+ }
1024
+ };
1025
+
1026
+ // src/libs/browser-search/engines/google.ts
1027
+ var GoogleSearchEngine = class {
1028
+ /**
1029
+ * Generates a Google search URL based on the provided query and options.
1030
+ *
1031
+ * @param query - The search query string
1032
+ * @param options - Search configuration options
1033
+ * @param options.count - Number of search results to request (default: 10)
1034
+ * @param options.excludeDomains - Array of domain names to exclude from search results
1035
+ * @returns Formatted Google search URL as a string
1036
+ */
1037
+ getSearchUrl(query, options) {
1038
+ const searchParams = new URLSearchParams({
1039
+ q: `${options.excludeDomains && options.excludeDomains.length > 0 ? `${options.excludeDomains.map((domain) => `-site:${domain}`).join(" ")} ` : ""}${query}`,
1040
+ num: `${options.count || 10}`
1041
+ });
1042
+ searchParams.set("udm", "14");
1043
+ return `https://www.google.com/search?${searchParams.toString()}`;
1044
+ }
1045
+ /**
1046
+ * Extracts search results from a Google search page.
1047
+ *
1048
+ * @param window - The browser window object containing the loaded Google search page
1049
+ * @returns Array of search results extracted from the page
1050
+ */
1051
+ extractSearchResults(window2) {
1052
+ const links = [];
1053
+ const document2 = window2.document;
1054
+ const isValidUrl = (url2) => {
1055
+ try {
1056
+ new URL(url2);
1057
+ return true;
1058
+ } catch (error) {
1059
+ return false;
1060
+ }
1061
+ };
1062
+ const extractSnippet = (element) => {
1063
+ const clone = element.cloneNode(true);
1064
+ const titleElements = clone.querySelectorAll("h3");
1065
+ titleElements.forEach((el) => el.remove());
1066
+ const citeElements = clone.querySelectorAll("cite");
1067
+ citeElements.forEach((el) => el.remove());
1068
+ const scriptElements = clone.querySelectorAll("script, style");
1069
+ scriptElements.forEach((el) => el.remove());
1070
+ const text = Array.from(clone.querySelectorAll("*")).filter((node) => node.textContent?.trim()).map((node) => node.textContent?.trim()).filter(Boolean).reduce((acc, curr) => {
1071
+ if (!acc.some(
1072
+ (text2) => text2.includes(curr) || curr.includes(text2)
1073
+ )) {
1074
+ acc.push(curr);
1075
+ }
1076
+ return acc;
1077
+ }, []).join(" ").trim().replace(/\s+/g, " ");
1078
+ return text;
1079
+ };
1080
+ try {
1081
+ const elements = document2.querySelectorAll(".tF2Cxc");
1082
+ elements.forEach((element) => {
1083
+ const titleEl = element.querySelector("h3");
1084
+ const urlEl = element.querySelector("a");
1085
+ const url2 = urlEl?.getAttribute("href");
1086
+ const snippet = extractSnippet(element.parentElement || element);
1087
+ if (!url2 || !isValidUrl(url2)) return;
1088
+ const item = {
1089
+ title: titleEl?.textContent || "",
1090
+ url: url2,
1091
+ snippet,
1092
+ content: ""
1093
+ };
1094
+ if (!item.title || !item.url) return;
1095
+ links.push(item);
1096
+ });
1097
+ } catch (error) {
1098
+ console.error(error);
1099
+ }
1100
+ return links;
1101
+ }
1102
+ /**
1103
+ * Waits for Google search results to load completely.
1104
+ *
1105
+ * @param page - The Puppeteer page object
1106
+ * @returns Promise that resolves when search results are loaded
1107
+ */
1108
+ async waitForSearchResults(page, timeout) {
1109
+ await page.waitForSelector("#search", {
1110
+ timeout: timeout ?? 1e4
1111
+ });
1112
+ }
1113
+ };
1114
+
1115
+ // src/libs/browser-search/engines/get.ts
1116
+ function getSearchEngine(engine) {
1117
+ switch (engine) {
1118
+ case "bing":
1119
+ return new BingSearchEngine();
1120
+ case "baidu":
1121
+ return new BaiduSearchEngine();
1122
+ case "sogou":
1123
+ return new SogouSearchEngine();
1124
+ case "google":
1125
+ return new GoogleSearchEngine();
1126
+ default:
1127
+ return new BingSearchEngine();
1128
+ }
1129
+ }
1130
+
1131
+ // src/libs/browser-search/search.ts
1132
+ var BrowserSearch = class {
1133
+ constructor(config = {}) {
1134
+ this.config = config;
1135
+ this.logger = config?.logger ?? defaultLogger3;
1136
+ this.browser = config.browser ?? new LocalBrowser({ logger: this.logger });
1137
+ this.defaultEngine = config.defaultEngine ?? "bing";
1138
+ }
1139
+ logger;
1140
+ browser;
1141
+ isBrowserOpen = false;
1142
+ defaultEngine;
1143
+ /**
1144
+ * Search web and extract content from result pages
1145
+ */
1146
+ async perform(options) {
1147
+ this.logger.info("Starting search with options:", options);
1148
+ const queries = Array.isArray(options.query) ? options.query : [options.query];
1149
+ const excludeDomains = options.excludeDomains || [];
1150
+ const count = options.count && Math.max(3, Math.floor(options.count / queries.length));
1151
+ const engine = options.engine || this.defaultEngine;
1152
+ try {
1153
+ if (!this.isBrowserOpen) {
1154
+ this.logger.info("Launching browser");
1155
+ await this.browser.launch(this.config.browserOptions);
1156
+ this.isBrowserOpen = true;
1157
+ } else {
1158
+ this.logger.info("Using existing browser instance");
1159
+ }
1160
+ const queue = new PromiseQueue(options.concurrency || 15);
1161
+ const visitedUrls = /* @__PURE__ */ new Set();
1162
+ const results = await Promise.all(
1163
+ queries.map(
1164
+ (query) => this.search(this.browser, {
1165
+ query,
1166
+ count,
1167
+ queue,
1168
+ visitedUrls,
1169
+ excludeDomains,
1170
+ truncate: options.truncate,
1171
+ needVisitedUrls: options.needVisitedUrls,
1172
+ engine
1173
+ })
1174
+ )
1175
+ );
1176
+ this.logger.success("Search completed successfully");
1177
+ return results.flat();
1178
+ } catch (error) {
1179
+ this.logger.error("Search failed:", error);
1180
+ return [];
1181
+ } finally {
1182
+ if (!options.keepBrowserOpen && this.isBrowserOpen) {
1183
+ await this.closeBrowser();
1184
+ }
1185
+ }
1186
+ }
1187
+ /**
1188
+ * Explicitly close the browser instance
1189
+ */
1190
+ async closeBrowser() {
1191
+ if (this.isBrowserOpen) {
1192
+ this.logger.info("Closing browser");
1193
+ await this.browser.close();
1194
+ this.isBrowserOpen = false;
1195
+ }
1196
+ }
1197
+ async search(browser, options) {
1198
+ const searchEngine = getSearchEngine(options.engine);
1199
+ const url2 = searchEngine.getSearchUrl(options.query, {
1200
+ count: options.count,
1201
+ excludeDomains: options.excludeDomains
1202
+ });
1203
+ this.logger.info(`Searching with ${options.engine} engine: ${url2}`);
1204
+ let links = await browser.evaluateOnNewPage({
1205
+ url: url2,
1206
+ waitForOptions: {
1207
+ waitUntil: "networkidle0"
1208
+ },
1209
+ pageFunction: searchEngine.extractSearchResults,
1210
+ pageFunctionParams: [],
1211
+ beforePageLoad: async (page) => {
1212
+ await interceptRequest(page);
1213
+ },
1214
+ afterPageLoad: async (page) => {
1215
+ if (searchEngine.waitForSearchResults)
1216
+ await searchEngine.waitForSearchResults(page, 1e4);
1217
+ }
1218
+ });
1219
+ this.logger.info(`Fetched ${links?.length ?? 0} links`);
1220
+ links = links?.filter((link) => {
1221
+ if (options.visitedUrls.has(link.url)) return false;
1222
+ options.visitedUrls.add(link.url);
1223
+ return !shouldSkipDomain(link.url);
1224
+ }) || [];
1225
+ if (!links.length) {
1226
+ this.logger.info("No valid links found");
1227
+ return [];
1228
+ }
1229
+ const results = await Promise.allSettled(
1230
+ options.needVisitedUrls ? links.map(
1231
+ (item) => options.queue.add(() => this.visitLink(this.browser, item))
1232
+ ) : links
1233
+ );
1234
+ return results.map((result) => {
1235
+ if (result.status === "rejected" || !result.value) return null;
1236
+ return {
1237
+ ...result.value,
1238
+ content: options.truncate ? result.value.content.slice(0, options.truncate) : result.value.content
1239
+ };
1240
+ }).filter((v) => v !== null);
1241
+ }
1242
+ async visitLink(browser, item) {
1243
+ try {
1244
+ this.logger.info("Visiting link:", item.url);
1245
+ const result = await browser.evaluateOnNewPage({
1246
+ url: item.url,
1247
+ pageFunction: extractPageInformation,
1248
+ pageFunctionParams: [READABILITY_SCRIPT],
1249
+ beforePageLoad: async (page) => {
1250
+ await interceptRequest(page);
1251
+ }
1252
+ });
1253
+ if (result) {
1254
+ const content = toMarkdown(result.content);
1255
+ return { ...result, url: item.url, content, snippet: item.snippet };
1256
+ }
1257
+ } catch (e) {
1258
+ this.logger.error("Failed to visit link:", e);
1259
+ }
1260
+ }
1261
+ };
1262
+
1263
+ // src/search/local.ts
1264
+ import { ConsoleLogger } from "@agent-infra/logger";
1265
+ var logger2 = new ConsoleLogger("[LocalSearch]");
1266
+ async function localSearch(options) {
1267
+ const { query, limit = 10 } = options;
1268
+ let { engines = "all" } = options;
1269
+ const browserSearch = new BrowserSearch({
1270
+ logger: logger2,
1271
+ browserOptions: {
1272
+ headless: true
1273
+ }
1274
+ });
1275
+ if (engines === "all") {
1276
+ engines = "bing,google,baidu,sogou";
1277
+ }
1278
+ try {
1279
+ const engineList = engines.split(",");
1280
+ if (engineList.length === 0) {
1281
+ throw new Error("engines is required");
1282
+ }
1283
+ const results = [];
1284
+ for (const engine of engineList) {
1285
+ const res = await browserSearch.perform({
1286
+ query,
1287
+ count: limit,
1288
+ engine,
1289
+ needVisitedUrls: false
1290
+ });
1291
+ if (res.length > 0) {
1292
+ results.push(...res);
1293
+ break;
1294
+ }
1295
+ }
1296
+ logger2.info(`Found ${results.length} results for ${query}`, results);
1297
+ return {
1298
+ results,
1299
+ success: true
1300
+ };
1301
+ } finally {
1302
+ await browserSearch.closeBrowser();
1303
+ }
1304
+ }
126
1305
 
127
1306
  // src/tools.ts
128
1307
  var SEARCH_TOOL = {
@@ -145,16 +1324,69 @@ var SEARCH_TOOL = {
145
1324
  },
146
1325
  categories: {
147
1326
  type: "string",
1327
+ enum: [
1328
+ "general",
1329
+ "news",
1330
+ "images",
1331
+ "videos",
1332
+ "it",
1333
+ "science",
1334
+ "map",
1335
+ "music",
1336
+ "files",
1337
+ "social_media"
1338
+ ],
148
1339
  description: "Categories to search for (default: general)"
149
1340
  },
150
1341
  timeRange: {
151
1342
  type: "string",
152
- description: "Time range for search results (default: all)"
1343
+ description: "Time range for search results (default: all)",
1344
+ enum: [
1345
+ "all",
1346
+ "day",
1347
+ "week",
1348
+ "month",
1349
+ "year"
1350
+ ]
153
1351
  }
154
1352
  },
155
1353
  required: ["query"]
156
1354
  }
157
1355
  };
1356
+ var MAP_TOOL = {
1357
+ name: "one_map",
1358
+ description: "Discover URLs from a starting point. Can use both sitemap.xml and HTML link discovery.",
1359
+ inputSchema: {
1360
+ type: "object",
1361
+ properties: {
1362
+ url: {
1363
+ type: "string",
1364
+ description: "Starting URL for URL discovery"
1365
+ },
1366
+ search: {
1367
+ type: "string",
1368
+ description: "Optional search term to filter URLs"
1369
+ },
1370
+ ignoreSitemap: {
1371
+ type: "boolean",
1372
+ description: "Skip sitemap.xml discovery and only use HTML links"
1373
+ },
1374
+ sitemapOnly: {
1375
+ type: "boolean",
1376
+ description: "Only use sitemap.xml for discovery, ignore HTML links"
1377
+ },
1378
+ includeSubdomains: {
1379
+ type: "boolean",
1380
+ description: "Include URLs from subdomains in results"
1381
+ },
1382
+ limit: {
1383
+ type: "number",
1384
+ description: "Maximum number of URLs to return"
1385
+ }
1386
+ },
1387
+ required: ["url"]
1388
+ }
1389
+ };
158
1390
  var SCRAPE_TOOL = {
159
1391
  name: "one_scrape",
160
1392
  description: "Scrape a single webpage with advanced options for content extraction. Supports various formats including markdown, HTML, and screenshots. Can execute custom actions like clicking or scrolling before scraping.",
@@ -348,6 +1580,7 @@ var EXTRACT_TOOL = {
348
1580
  // src/index.ts
349
1581
  import FirecrawlApp from "@mendable/firecrawl-js";
350
1582
  import dotenvx from "@dotenvx/dotenvx";
1583
+ import { SafeSearchType as SafeSearchType2 } from "duck-duck-scrape";
351
1584
  dotenvx.config();
352
1585
  var SEARCH_API_URL = process.env.SEARCH_API_URL;
353
1586
  var SEARCH_API_KEY = process.env.SEARCH_API_KEY;
@@ -378,7 +1611,7 @@ var server = new Server(
378
1611
  }
379
1612
  }
380
1613
  );
381
- var searchConfig = {
1614
+ var searchDefaultConfig = {
382
1615
  limit: Number(LIMIT),
383
1616
  categories: CATEGORIES,
384
1617
  format: FORMAT,
@@ -392,7 +1625,8 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
392
1625
  tools: [
393
1626
  SEARCH_TOOL,
394
1627
  EXTRACT_TOOL,
395
- SCRAPE_TOOL
1628
+ SCRAPE_TOOL,
1629
+ MAP_TOOL
396
1630
  ]
397
1631
  }));
398
1632
  server.setRequestHandler(CallToolRequestSchema, async (request) => {
@@ -413,10 +1647,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
413
1647
  }
414
1648
  try {
415
1649
  const { results, success } = await processSearch({
416
- ...searchConfig,
417
1650
  ...args,
418
1651
  apiKey: SEARCH_API_KEY ?? "",
419
- apiUrl: SEARCH_API_URL ?? ""
1652
+ apiUrl: SEARCH_API_URL
420
1653
  });
421
1654
  if (!success) {
422
1655
  throw new Error("Failed to search");
@@ -490,6 +1723,34 @@ ${result.markdown ? `Content: ${result.markdown}` : ""}`);
490
1723
  };
491
1724
  }
492
1725
  }
1726
+ case "one_map": {
1727
+ if (!checkMapArgs(args)) {
1728
+ throw new Error(`Invalid arguments for tool: [${name}]`);
1729
+ }
1730
+ try {
1731
+ const { content, success, result } = await processMapUrl(args.url, args);
1732
+ return {
1733
+ content,
1734
+ result,
1735
+ success
1736
+ };
1737
+ } catch (error) {
1738
+ server.sendLoggingMessage({
1739
+ level: "error",
1740
+ data: `[${(/* @__PURE__ */ new Date()).toISOString()}] Error mapping: ${error}`
1741
+ });
1742
+ const msg = error instanceof Error ? error.message : String(error);
1743
+ return {
1744
+ success: false,
1745
+ content: [
1746
+ {
1747
+ type: "text",
1748
+ text: msg
1749
+ }
1750
+ ]
1751
+ };
1752
+ }
1753
+ }
493
1754
  default: {
494
1755
  throw new Error(`Unknown tool: ${name}`);
495
1756
  }
@@ -524,18 +1785,51 @@ ${result.markdown ? `Content: ${result.markdown}` : ""}`);
524
1785
  });
525
1786
  async function processSearch(args) {
526
1787
  switch (SEARCH_PROVIDER) {
527
- case "searxng":
528
- return await searxngSearch({
529
- ...searchConfig,
1788
+ case "searxng": {
1789
+ const params = {
1790
+ ...searchDefaultConfig,
530
1791
  ...args,
531
1792
  apiKey: SEARCH_API_KEY
532
- });
533
- case "tavily":
1793
+ };
1794
+ const { categories, language } = searchDefaultConfig;
1795
+ if (categories) {
1796
+ params.categories = categories;
1797
+ }
1798
+ if (language) {
1799
+ params.language = language;
1800
+ }
1801
+ return await searxngSearch(params);
1802
+ }
1803
+ case "tavily": {
534
1804
  return await tavilySearch({
535
- ...searchConfig,
1805
+ ...searchDefaultConfig,
536
1806
  ...args,
537
1807
  apiKey: SEARCH_API_KEY
538
1808
  });
1809
+ }
1810
+ case "bing": {
1811
+ return await bingSearch({
1812
+ ...searchDefaultConfig,
1813
+ ...args,
1814
+ apiKey: SEARCH_API_KEY
1815
+ });
1816
+ }
1817
+ case "duckduckgo": {
1818
+ const safeSearch = args.safeSearch ?? 0;
1819
+ const safeSearchOptions = [SafeSearchType2.STRICT, SafeSearchType2.MODERATE, SafeSearchType2.OFF];
1820
+ return await duckDuckGoSearch({
1821
+ ...searchDefaultConfig,
1822
+ ...args,
1823
+ apiKey: SEARCH_API_KEY,
1824
+ safeSearch: safeSearchOptions[safeSearch]
1825
+ });
1826
+ }
1827
+ case "local": {
1828
+ return await localSearch({
1829
+ ...searchDefaultConfig,
1830
+ ...args
1831
+ });
1832
+ }
539
1833
  default:
540
1834
  throw new Error(`Unsupported search provider: ${SEARCH_PROVIDER}`);
541
1835
  }
@@ -577,12 +1871,36 @@ async function processScrape(url2, args) {
577
1871
  success: true
578
1872
  };
579
1873
  }
1874
+ async function processMapUrl(url2, args) {
1875
+ const res = await firecrawl.mapUrl(url2, {
1876
+ ...args
1877
+ });
1878
+ if ("error" in res) {
1879
+ throw new Error(`Failed to map: ${res.error}`);
1880
+ }
1881
+ if (!res.links) {
1882
+ throw new Error(`No links found from: ${url2}`);
1883
+ }
1884
+ return {
1885
+ content: [
1886
+ {
1887
+ type: "text",
1888
+ text: res.links.join("\n").trim()
1889
+ }
1890
+ ],
1891
+ result: res.links,
1892
+ success: true
1893
+ };
1894
+ }
580
1895
  function checkSearchArgs(args) {
581
1896
  return typeof args === "object" && args !== null && "query" in args && typeof args.query === "string";
582
1897
  }
583
1898
  function checkScrapeArgs(args) {
584
1899
  return typeof args === "object" && args !== null && "url" in args && typeof args.url === "string";
585
1900
  }
1901
+ function checkMapArgs(args) {
1902
+ return typeof args === "object" && args !== null && "url" in args && typeof args.url === "string";
1903
+ }
586
1904
  async function runServer() {
587
1905
  try {
588
1906
  process.stdout.write("Starting OneSearch MCP server...\n");