one-search-mcp 1.0.5 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -31,9 +31,109 @@ var import_server = require("@modelcontextprotocol/sdk/server/index.js");
31
31
  var import_types = require("@modelcontextprotocol/sdk/types.js");
32
32
  var import_stdio = require("@modelcontextprotocol/sdk/server/stdio.js");
33
33
 
34
- // src/search.ts
34
+ // src/search/bing.ts
35
+ async function bingSearch(options) {
36
+ const { query, limit = 10, safeSearch = 0, page = 1, apiUrl = "https://api.bing.microsoft.com/v7.0/search", apiKey, language } = options;
37
+ const bingSafeSearchOptions = ["Off", "Moderate", "Strict"];
38
+ if (!apiKey) {
39
+ throw new Error("Bing API key is required");
40
+ }
41
+ const searchOptions = {
42
+ q: query,
43
+ count: limit,
44
+ offset: (page - 1) * limit,
45
+ mkt: language,
46
+ safeSearch: bingSafeSearchOptions[safeSearch]
47
+ };
48
+ try {
49
+ const queryParams = new URLSearchParams();
50
+ Object.entries(searchOptions).forEach(([key, value]) => {
51
+ if (value !== void 0) {
52
+ queryParams.set(key, value.toString());
53
+ }
54
+ });
55
+ const res = await fetch(`${apiUrl}?${queryParams}`, {
56
+ method: "GET",
57
+ headers: {
58
+ "Content-Type": "application/json",
59
+ "Ocp-Apim-Subscription-Key": apiKey
60
+ }
61
+ });
62
+ if (!res.ok) {
63
+ throw new Error(`Bing search error: ${res.status} ${res.statusText}`);
64
+ }
65
+ const data = await res.json();
66
+ const serp = data.webPages?.value;
67
+ const results = serp?.map((item) => ({
68
+ title: item.name,
69
+ snippet: item.snippet,
70
+ url: item.url,
71
+ source: item.siteName,
72
+ thumbnailUrl: item.thumbnailUrl,
73
+ language: item.language,
74
+ image: null,
75
+ video: null,
76
+ engine: "bing"
77
+ })) ?? [];
78
+ return {
79
+ results,
80
+ success: true
81
+ };
82
+ } catch (err) {
83
+ const msg = err instanceof Error ? err.message : "Bing search error.";
84
+ process.stdout.write(msg);
85
+ throw err;
86
+ }
87
+ }
88
+
89
+ // src/search/duckduckgo.ts
90
+ var DDG = __toESM(require("duck-duck-scrape"), 1);
91
+ var import_async_retry = __toESM(require("async-retry"), 1);
92
+ async function duckDuckGoSearch(options) {
93
+ try {
94
+ const { query, timeout = 1e4, safeSearch = DDG.SafeSearchType.OFF, retry = { retries: 3 }, ...searchOptions } = options;
95
+ const res = await (0, import_async_retry.default)(
96
+ () => {
97
+ return DDG.search(query, {
98
+ ...searchOptions,
99
+ safeSearch
100
+ }, {
101
+ // needle options
102
+ response_timeout: timeout
103
+ });
104
+ },
105
+ retry
106
+ );
107
+ const results = res ? {
108
+ noResults: res.noResults,
109
+ vqd: res.vqd,
110
+ results: res.results
111
+ } : {
112
+ noResults: true,
113
+ vqd: "",
114
+ results: []
115
+ };
116
+ return {
117
+ results: results.results.map((result) => ({
118
+ title: result.title,
119
+ snippet: result.description,
120
+ url: result.url,
121
+ source: result.hostname,
122
+ image: null,
123
+ video: null,
124
+ engine: "duckduckgo"
125
+ })),
126
+ success: true
127
+ };
128
+ } catch (error) {
129
+ const msg = error instanceof Error ? error.message : "DuckDuckGo search error.";
130
+ process.stdout.write(msg);
131
+ throw error;
132
+ }
133
+ }
134
+
135
+ // src/search/searxng.ts
35
136
  var import_node_url = __toESM(require("url"), 1);
36
- var import_core = require("@tavily/core");
37
137
  async function searxngSearch(params) {
38
138
  try {
39
139
  const {
@@ -116,7 +216,9 @@ async function searxngSearch(params) {
116
216
  throw err;
117
217
  }
118
218
  }
119
- var tvly = null;
219
+
220
+ // src/search/tavily.ts
221
+ var import_core = require("@tavily/core");
120
222
  async function tavilySearch(options) {
121
223
  const {
122
224
  query,
@@ -128,27 +230,1104 @@ async function tavilySearch(options) {
128
230
  if (!apiKey) {
129
231
  throw new Error("Tavily API key is required");
130
232
  }
131
- if (!tvly) {
132
- tvly = (0, import_core.tavily)({
233
+ try {
234
+ const tvly = (0, import_core.tavily)({
133
235
  apiKey
134
236
  });
237
+ const params = {
238
+ topic: categories,
239
+ timeRange,
240
+ maxResults: limit
241
+ };
242
+ const res = await tvly.search(query, params);
243
+ const results = res.results.map((item) => ({
244
+ title: item.title,
245
+ url: item.url,
246
+ snippet: item.content,
247
+ engine: "tavily"
248
+ }));
249
+ return {
250
+ results,
251
+ success: true
252
+ };
253
+ } catch (error) {
254
+ const msg = error instanceof Error ? error.message : "Tavily search error.";
255
+ process.stdout.write(msg);
256
+ throw error;
135
257
  }
136
- const params = {
137
- topic: categories,
138
- timeRange,
139
- maxResults: limit
140
- };
141
- const res = await tvly.search(query, params);
142
- const results = res.results.map((item) => ({
143
- title: item.title,
144
- url: item.url,
145
- snippet: item.content
146
- }));
258
+ }
259
+
260
+ // src/libs/browser/types.ts
261
+ var import_puppeteer_core = require("puppeteer-core");
262
+
263
+ // src/libs/browser/finder.ts
264
+ var fs = __toESM(require("fs"), 1);
265
+ var path = __toESM(require("path"), 1);
266
+ var os = __toESM(require("os"), 1);
267
+ var import_logger = require("@agent-infra/logger");
268
+ var BrowserFinder = class {
269
+ /**
270
+ * Logger instance for diagnostic output
271
+ */
272
+ logger;
273
+ /**
274
+ * Creates a new BrowserFinder instance
275
+ * @param {Logger} [logger] - Optional custom logger
276
+ */
277
+ constructor(logger3) {
278
+ this.logger = logger3 ?? import_logger.defaultLogger;
279
+ }
280
+ /**
281
+ * Getter that returns the list of supported browsers with their platform-specific paths
282
+ * @returns {Browser[]} Array of browser configurations
283
+ * @private
284
+ */
285
+ get browsers() {
286
+ const HOME_DIR = os.homedir();
287
+ const LOCAL_APP_DATA = process.env.LOCALAPPDATA;
288
+ return [
289
+ {
290
+ name: "Chromium",
291
+ executable: {
292
+ win32: "C:\\Program Files\\Chromium\\Application\\chrome.exe",
293
+ darwin: "/Applications/Chromium.app/Contents/MacOS/Chromium",
294
+ linux: "/usr/bin/chromium"
295
+ },
296
+ userDataDir: {
297
+ win32: `${LOCAL_APP_DATA}\\Chromium\\User Data`,
298
+ darwin: `${HOME_DIR}/Library/Application Support/Chromium`,
299
+ linux: `${HOME_DIR}/.config/chromium`
300
+ }
301
+ },
302
+ {
303
+ name: "Google Chrome",
304
+ executable: {
305
+ win32: "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
306
+ darwin: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
307
+ linux: "/usr/bin/google-chrome"
308
+ },
309
+ userDataDir: {
310
+ win32: `${LOCAL_APP_DATA}\\Google\\Chrome\\User Data`,
311
+ darwin: `${HOME_DIR}/Library/Application Support/Google/Chrome`,
312
+ linux: `${HOME_DIR}/.config/google-chrome`
313
+ }
314
+ },
315
+ {
316
+ name: "Google Chrome Canary",
317
+ executable: {
318
+ win32: "C:\\Program Files\\Google\\Chrome Canary\\Application\\chrome.exe",
319
+ darwin: "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
320
+ linux: "/usr/bin/google-chrome-canary"
321
+ },
322
+ userDataDir: {
323
+ win32: `${LOCAL_APP_DATA}\\Google\\Chrome Canary\\User Data`,
324
+ darwin: `${HOME_DIR}/Library/Application Support/Google/Chrome Canary`,
325
+ linux: `${HOME_DIR}/.config/google-chrome-canary`
326
+ }
327
+ }
328
+ ];
329
+ }
330
+ /**
331
+ * Find a specific browser or the first available browser
332
+ * @param {string} [name] - Optional browser name to find
333
+ * @returns {{ executable: string; userDataDir: string }} Browser executable and user data paths
334
+ * @throws {Error} If no supported browser is found or the platform is unsupported
335
+ */
336
+ findBrowser(name) {
337
+ const platform = process.platform;
338
+ this.logger.info("Finding browser on platform:", platform);
339
+ if (platform !== "darwin" && platform !== "win32" && platform !== "linux") {
340
+ const error = new Error(`Unsupported platform: ${platform}`);
341
+ this.logger.error(error.message);
342
+ throw error;
343
+ }
344
+ const browser = name ? this.browsers.find(
345
+ (b) => b.name === name && fs.existsSync(b.executable[platform])
346
+ ) : this.browsers.find((b) => fs.existsSync(b.executable[platform]));
347
+ this.logger.log("browser", browser);
348
+ if (!browser) {
349
+ const error = name ? new Error(`Cannot find browser: ${name}`) : new Error(
350
+ "Cannot find a supported browser on your system. Please install Chrome, Edge, or Brave."
351
+ );
352
+ this.logger.error(error.message);
353
+ throw error;
354
+ }
355
+ const result = {
356
+ executable: browser.executable[platform],
357
+ userDataDir: browser.userDataDir[platform]
358
+ };
359
+ this.logger.success(`Found browser: ${browser.name}`);
360
+ this.logger.info("Browser details:", result);
361
+ return result;
362
+ }
363
+ /**
364
+ * Get browser profiles for a specific browser
365
+ * Reads the Local State file to extract profile information
366
+ * @param {string} [browserName] - Optional browser name to get profiles for
367
+ * @returns {Array<{ displayName: string; path: string }>} Array of profile objects with display names and paths
368
+ */
369
+ getBrowserProfiles(browserName) {
370
+ const browser = this.findBrowser(browserName);
371
+ try {
372
+ const localState = JSON.parse(
373
+ fs.readFileSync(path.join(browser.userDataDir, "Local State"), "utf8")
374
+ );
375
+ const profileInfo = localState.profile.info_cache;
376
+ return Object.entries(profileInfo).map(
377
+ ([profileName, info]) => ({
378
+ displayName: info.name,
379
+ path: path.join(browser.userDataDir, profileName)
380
+ })
381
+ );
382
+ } catch (error) {
383
+ return [];
384
+ }
385
+ }
386
+ /**
387
+ * Legacy method for backwards compatibility
388
+ * Finds Chrome browser executable path
389
+ * @deprecated Use findBrowser instead
390
+ * @returns {string | null} Chrome executable path or null if not found
391
+ */
392
+ findChrome() {
393
+ try {
394
+ const { executable } = this.findBrowser("Google Chrome");
395
+ return executable;
396
+ } catch {
397
+ return null;
398
+ }
399
+ }
400
+ };
401
+
402
+ // src/libs/browser/base.ts
403
+ var import_logger2 = require("@agent-infra/logger");
404
+ var BaseBrowser = class {
405
+ /**
406
+ * The underlying Puppeteer browser instance
407
+ * @protected
408
+ */
409
+ browser = null;
410
+ /**
411
+ * Logger instance for browser-related logging
412
+ * @protected
413
+ */
414
+ logger;
415
+ /**
416
+ * Reference to the currently active browser page
417
+ * @protected
418
+ */
419
+ activePage = null;
420
+ /**
421
+ * Creates an instance of BaseBrowser
422
+ * @param {BaseBrowserOptions} [options] - Configuration options
423
+ */
424
+ constructor(options) {
425
+ this.logger = options?.logger ?? import_logger2.defaultLogger;
426
+ this.logger.info("Browser Options:", options);
427
+ }
428
+ /**
429
+ * Get the underlying Puppeteer browser instance
430
+ * @throws Error if browser is not launched
431
+
432
+ * @returns {puppeteer.Browser} Puppeteer browser instance
433
+ */
434
+ getBrowser() {
435
+ if (!this.browser) {
436
+ throw new Error("Browser not launched");
437
+ }
438
+ return this.browser;
439
+ }
440
+ /**
441
+ * Sets up listeners for browser page events
442
+ * Tracks page creation and updates active page reference
443
+ * @protected
444
+ */
445
+ async setupPageListener() {
446
+ if (!this.browser) return;
447
+ this.browser.on("targetcreated", async (target) => {
448
+ const page = await target.page();
449
+ if (page) {
450
+ this.logger.info("New page created:", await page.url());
451
+ this.activePage = page;
452
+ page.once("close", () => {
453
+ if (this.activePage === page) {
454
+ this.activePage = null;
455
+ }
456
+ });
457
+ page.once("error", () => {
458
+ if (this.activePage === page) {
459
+ this.activePage = null;
460
+ }
461
+ });
462
+ }
463
+ });
464
+ }
465
+ /**
466
+ * Closes the browser instance and cleans up resources
467
+ * @returns {Promise<void>} Promise that resolves when browser is closed
468
+ * @throws {Error} If browser fails to close properly
469
+ */
470
+ async close() {
471
+ this.logger.info("Closing browser");
472
+ try {
473
+ await this.browser?.close();
474
+ this.browser = null;
475
+ this.logger.success("Browser closed successfully");
476
+ } catch (error) {
477
+ this.logger.error("Failed to close browser:", error);
478
+ throw error;
479
+ }
480
+ }
481
+ /**
482
+ * Creates a new page, navigates to the specified URL, executes a function in the page context, and returns the result
483
+ * This method is inspired and modified from https://github.com/egoist/local-web-search/blob/04608ed09aa103e2fff6402c72ca12edfb692d19/src/browser.ts#L74
484
+ * @template T - Type of parameters passed to the page function
485
+ * @template R - Return type of the page function
486
+ * @param {EvaluateOnNewPageOptions<T, R>} options - Configuration options for the page evaluation
487
+ * @returns {Promise<R | null>} Promise resolving to the result of the page function or null
488
+ * @throws {Error} If page creation or evaluation fails
489
+ */
490
+ async evaluateOnNewPage(options) {
491
+ const {
492
+ url: url2,
493
+ pageFunction,
494
+ pageFunctionParams,
495
+ beforePageLoad,
496
+ afterPageLoad,
497
+ beforeSendResult,
498
+ waitForOptions
499
+ } = options;
500
+ const page = await this.browser.newPage();
501
+ try {
502
+ await beforePageLoad?.(page);
503
+ await page.goto(url2, {
504
+ waitUntil: "networkidle2",
505
+ ...waitForOptions
506
+ });
507
+ await afterPageLoad?.(page);
508
+ const _window = await page.evaluateHandle(() => window);
509
+ const result = await page.evaluate(
510
+ pageFunction,
511
+ _window,
512
+ ...pageFunctionParams
513
+ );
514
+ await beforeSendResult?.(page, result);
515
+ await _window.dispose();
516
+ await page.close();
517
+ return result;
518
+ } catch (error) {
519
+ await page.close();
520
+ throw error;
521
+ }
522
+ }
523
+ /**
524
+ * Creates a new browser page
525
+ * @returns {Promise<Page>} Promise resolving to the newly created page
526
+ * @throws {Error} If browser is not launched or page creation fails
527
+ */
528
+ async createPage() {
529
+ if (!this.browser) {
530
+ this.logger.error("No active browser");
531
+ throw new Error("Browser not launched");
532
+ }
533
+ const page = await this.browser.newPage();
534
+ return page;
535
+ }
536
+ /**
537
+ * Gets the currently active page or finds an active page if none is currently tracked
538
+ * If no active pages exist, creates a new page
539
+ * @returns {Promise<Page>} Promise resolving to the active page
540
+ * @throws {Error} If browser is not launched or no active page can be found/created
541
+ */
542
+ async getActivePage() {
543
+ if (!this.browser) {
544
+ throw new Error("Browser not launched");
545
+ }
546
+ if (this.activePage) {
547
+ try {
548
+ await this.activePage.evaluate(() => document.readyState);
549
+ return this.activePage;
550
+ } catch (e) {
551
+ this.logger.warn("Active page no longer available:", e);
552
+ this.activePage = null;
553
+ }
554
+ }
555
+ const pages = await this.browser.pages();
556
+ if (pages.length === 0) {
557
+ this.activePage = await this.createPage();
558
+ return this.activePage;
559
+ }
560
+ for (let i = pages.length - 1; i >= 0; i--) {
561
+ const page = pages[i];
562
+ try {
563
+ await page.evaluate(() => document.readyState);
564
+ this.activePage = page;
565
+ return page;
566
+ } catch (e) {
567
+ continue;
568
+ }
569
+ }
570
+ throw new Error("No active page found");
571
+ }
572
+ };
573
+
574
+ // src/libs/browser/local.ts
575
+ var puppeteer = __toESM(require("puppeteer-core"), 1);
576
+ var LocalBrowser = class extends BaseBrowser {
577
+ /**
578
+ * Browser finder instance to detect and locate installed browsers
579
+ * @private
580
+ */
581
+ browserFinder = new BrowserFinder();
582
+ /**
583
+ * Launches a local browser instance with specified options
584
+ * Automatically detects installed browsers if no executable path is provided
585
+ * @param {LaunchOptions} options - Configuration options for launching the browser
586
+ * @returns {Promise<void>} Promise that resolves when the browser is successfully launched
587
+ * @throws {Error} If the browser cannot be launched
588
+ */
589
+ async launch(options = {}) {
590
+ this.logger.info("Launching browser with options:", options);
591
+ const executablePath = options?.executablePath || this.browserFinder.findBrowser().executable;
592
+ this.logger.info("Using executable path:", executablePath);
593
+ const viewportWidth = options?.defaultViewport?.width ?? 1280;
594
+ const viewportHeight = options?.defaultViewport?.height ?? 800;
595
+ const puppeteerLaunchOptions = {
596
+ executablePath,
597
+ headless: options?.headless ?? false,
598
+ defaultViewport: {
599
+ width: viewportWidth,
600
+ height: viewportHeight
601
+ },
602
+ args: [
603
+ "--no-sandbox",
604
+ "--mute-audio",
605
+ "--disable-gpu",
606
+ "--disable-http2",
607
+ "--disable-blink-features=AutomationControlled",
608
+ "--disable-infobars",
609
+ "--disable-background-timer-throttling",
610
+ "--disable-popup-blocking",
611
+ "--disable-backgrounding-occluded-windows",
612
+ "--disable-renderer-backgrounding",
613
+ "--disable-window-activation",
614
+ "--disable-focus-on-load",
615
+ "--no-default-browser-check",
616
+ // disable default browser check
617
+ "--disable-web-security",
618
+ // disable CORS
619
+ "--disable-features=IsolateOrigins,site-per-process",
620
+ "--disable-site-isolation-trials",
621
+ `--window-size=${viewportWidth},${viewportHeight + 90}`,
622
+ options?.proxy ? `--proxy-server=${options.proxy}` : "",
623
+ options?.profilePath ? `--profile-directory=${options.profilePath}` : ""
624
+ ].filter(Boolean),
625
+ ignoreDefaultArgs: ["--enable-automation"],
626
+ timeout: options.timeout ?? 0,
627
+ downloadBehavior: {
628
+ policy: "deny"
629
+ }
630
+ };
631
+ this.logger.info("Launch options:", puppeteerLaunchOptions);
632
+ try {
633
+ this.browser = await puppeteer.launch(puppeteerLaunchOptions);
634
+ await this.setupPageListener();
635
+ this.logger.success("Browser launched successfully");
636
+ } catch (error) {
637
+ this.logger.error("Failed to launch browser:", error);
638
+ throw error;
639
+ }
640
+ }
641
+ };
642
+
643
+ // src/libs/browser/remote.ts
644
+ var puppeteer2 = __toESM(require("puppeteer-core"), 1);
645
+
646
+ // src/libs/browser-search/readability.ts
647
+ var READABILITY_SCRIPT = 'function q(t,e){if(e&&e.documentElement)t=e,e=arguments[2];else if(!t||!t.documentElement)throw new Error("First argument to Readability constructor should be a document object.");if(e=e||{},this._doc=t,this._docJSDOMParser=this._doc.firstChild.__JSDOMParser__,this._articleTitle=null,this._articleByline=null,this._articleDir=null,this._articleSiteName=null,this._attempts=[],this._debug=!!e.debug,this._maxElemsToParse=e.maxElemsToParse||this.DEFAULT_MAX_ELEMS_TO_PARSE,this._nbTopCandidates=e.nbTopCandidates||this.DEFAULT_N_TOP_CANDIDATES,this._charThreshold=e.charThreshold||this.DEFAULT_CHAR_THRESHOLD,this._classesToPreserve=this.CLASSES_TO_PRESERVE.concat(e.classesToPreserve||[]),this._keepClasses=!!e.keepClasses,this._serializer=e.serializer||function(i){return i.innerHTML},this._disableJSONLD=!!e.disableJSONLD,this._allowedVideoRegex=e.allowedVideoRegex||this.REGEXPS.videos,this._flags=this.FLAG_STRIP_UNLIKELYS|this.FLAG_WEIGHT_CLASSES|this.FLAG_CLEAN_CONDITIONALLY,this._debug){let i=function(r){if(r.nodeType==r.TEXT_NODE)return`${r.nodeName} ("${r.textContent}")`;let l=Array.from(r.attributes||[],function(a){return`${a.name}="${a.value}"`}).join(" ");return`<${r.localName} ${l}>`};this.log=function(){if(typeof console!="undefined"){let l=Array.from(arguments,a=>a&&a.nodeType==this.ELEMENT_NODE?i(a):a);l.unshift("Reader: (Readability)"),console.log.apply(console,l)}else if(typeof dump!="undefined"){var r=Array.prototype.map.call(arguments,function(l){return l&&l.nodeName?i(l):l}).join(" ");dump("Reader: (Readability) "+r+`\n`)}}}else this.log=function(){}}q.prototype={FLAG_STRIP_UNLIKELYS:1,FLAG_WEIGHT_CLASSES:2,FLAG_CLEAN_CONDITIONALLY:4,ELEMENT_NODE:1,TEXT_NODE:3,DEFAULT_MAX_ELEMS_TO_PARSE:0,DEFAULT_N_TOP_CANDIDATES:5,DEFAULT_TAGS_TO_SCORE:"section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","),DEFAULT_CHAR_THRESHOLD:500,REGEXPS:{unlikelyCandidates:/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,okMaybeItsACandidate:/and|article|body|column|content|main|shadow/i,positive:/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,negative:/-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,extraneous:/print|archive|comment|discuss|e[\\-]?mail|share|reply|all|login|sign|single|utility/i,byline:/byline|author|dateline|writtenby|p-author/i,replaceFonts:/<(\\/?)font[^>]*>/gi,normalize:/\\s{2,}/g,videos:/\\/\\/(www\\.)?((dailymotion|youtube|youtube-nocookie|player\\.vimeo|v\\.qq)\\.com|(archive|upload\\.wikimedia)\\.org|player\\.twitch\\.tv)/i,shareElements:/(\\b|_)(share|sharedaddy)(\\b|_)/i,nextLink:/(next|weiter|continue|>([^\\|]|$)|\xBB([^\\|]|$))/i,prevLink:/(prev|earl|old|new|<|\xAB)/i,tokenize:/\\W+/g,whitespace:/^\\s*$/,hasContent:/\\S$/,hashUrl:/^#.+/,srcsetUrl:/(\\S+)(\\s+[\\d.]+[xw])?(\\s*(?:,|$))/g,b64DataUrl:/^data:\\s*([^\\s;,]+)\\s*;\\s*base64\\s*,/i,commas:/\\u002C|\\u060C|\\uFE50|\\uFE10|\\uFE11|\\u2E41|\\u2E34|\\u2E32|\\uFF0C/g,jsonLdArticleTypes:/^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/},UNLIKELY_ROLES:["menu","menubar","complementary","navigation","alert","alertdialog","dialog"],DIV_TO_P_ELEMS:new Set(["BLOCKQUOTE","DL","DIV","IMG","OL","P","PRE","TABLE","UL"]),ALTER_TO_DIV_EXCEPTIONS:["DIV","ARTICLE","SECTION","P"],PRESENTATIONAL_ATTRIBUTES:["align","background","bgcolor","border","cellpadding","cellspacing","frame","hspace","rules","style","valign","vspace"],DEPRECATED_SIZE_ATTRIBUTE_ELEMS:["TABLE","TH","TD","HR","PRE"],PHRASING_ELEMS:["ABBR","AUDIO","B","BDO","BR","BUTTON","CITE","CODE","DATA","DATALIST","DFN","EM","EMBED","I","IMG","INPUT","KBD","LABEL","MARK","MATH","METER","NOSCRIPT","OBJECT","OUTPUT","PROGRESS","Q","RUBY","SAMP","SCRIPT","SELECT","SMALL","SPAN","STRONG","SUB","SUP","TEXTAREA","TIME","VAR","WBR"],CLASSES_TO_PRESERVE:["page"],HTML_ESCAPE_MAP:{lt:"<",gt:">",amp:"&",quot:\'"\',apos:"\'"},_postProcessContent:function(t){this._fixRelativeUris(t),this._simplifyNestedElements(t),this._keepClasses||this._cleanClasses(t)},_removeNodes:function(t,e){if(this._docJSDOMParser&&t._isLiveNodeList)throw new Error("Do not pass live node lists to _removeNodes");for(var i=t.length-1;i>=0;i--){var r=t[i],l=r.parentNode;l&&(!e||e.call(this,r,i,t))&&l.removeChild(r)}},_replaceNodeTags:function(t,e){if(this._docJSDOMParser&&t._isLiveNodeList)throw new Error("Do not pass live node lists to _replaceNodeTags");for(let i of t)this._setNodeTag(i,e)},_forEachNode:function(t,e){Array.prototype.forEach.call(t,e,this)},_findNode:function(t,e){return Array.prototype.find.call(t,e,this)},_someNode:function(t,e){return Array.prototype.some.call(t,e,this)},_everyNode:function(t,e){return Array.prototype.every.call(t,e,this)},_concatNodeLists:function(){var t=Array.prototype.slice,e=t.call(arguments),i=e.map(function(r){return t.call(r)});return Array.prototype.concat.apply([],i)},_getAllNodesWithTag:function(t,e){return t.querySelectorAll?t.querySelectorAll(e.join(",")):[].concat.apply([],e.map(function(i){var r=t.getElementsByTagName(i);return Array.isArray(r)?r:Array.from(r)}))},_cleanClasses:function(t){var e=this._classesToPreserve,i=(t.getAttribute("class")||"").split(/\\s+/).filter(function(r){return e.indexOf(r)!=-1}).join(" ");for(i?t.setAttribute("class",i):t.removeAttribute("class"),t=t.firstElementChild;t;t=t.nextElementSibling)this._cleanClasses(t)},_fixRelativeUris:function(t){var e=this._doc.baseURI,i=this._doc.documentURI;function r(s){if(e==i&&s.charAt(0)=="#")return s;try{return new URL(s,e).href}catch(h){}return s}var l=this._getAllNodesWithTag(t,["a"]);this._forEachNode(l,function(s){var h=s.getAttribute("href");if(h)if(h.indexOf("javascript:")===0)if(s.childNodes.length===1&&s.childNodes[0].nodeType===this.TEXT_NODE){var c=this._doc.createTextNode(s.textContent);s.parentNode.replaceChild(c,s)}else{for(var n=this._doc.createElement("span");s.firstChild;)n.appendChild(s.firstChild);s.parentNode.replaceChild(n,s)}else s.setAttribute("href",r(h))});var a=this._getAllNodesWithTag(t,["img","picture","figure","video","audio","source"]);this._forEachNode(a,function(s){var h=s.getAttribute("src"),c=s.getAttribute("poster"),n=s.getAttribute("srcset");if(h&&s.setAttribute("src",r(h)),c&&s.setAttribute("poster",r(c)),n){var u=n.replace(this.REGEXPS.srcsetUrl,function(m,b,N,v){return r(b)+(N||"")+v});s.setAttribute("srcset",u)}})},_simplifyNestedElements:function(t){for(var e=t;e;){if(e.parentNode&&["DIV","SECTION"].includes(e.tagName)&&!(e.id&&e.id.startsWith("readability"))){if(this._isElementWithoutContent(e)){e=this._removeAndGetNext(e);continue}else if(this._hasSingleTagInsideElement(e,"DIV")||this._hasSingleTagInsideElement(e,"SECTION")){for(var i=e.children[0],r=0;r<e.attributes.length;r++)i.setAttribute(e.attributes[r].name,e.attributes[r].value);e.parentNode.replaceChild(i,e),e=i;continue}}e=this._getNextNode(e)}},_getArticleTitle:function(){var t=this._doc,e="",i="";try{e=i=t.title.trim(),typeof e!="string"&&(e=i=this._getInnerText(t.getElementsByTagName("title")[0]))}catch(u){}var r=!1;function l(u){return u.split(/\\s+/).length}if(/ [\\|\\-\\\\\\/>\xBB] /.test(e))r=/ [\\\\\\/>\xBB] /.test(e),e=i.replace(/(.*)[\\|\\-\\\\\\/>\xBB] .*/gi,"$1"),l(e)<3&&(e=i.replace(/[^\\|\\-\\\\\\/>\xBB]*[\\|\\-\\\\\\/>\xBB](.*)/gi,"$1"));else if(e.indexOf(": ")!==-1){var a=this._concatNodeLists(t.getElementsByTagName("h1"),t.getElementsByTagName("h2")),s=e.trim(),h=this._someNode(a,function(u){return u.textContent.trim()===s});h||(e=i.substring(i.lastIndexOf(":")+1),l(e)<3?e=i.substring(i.indexOf(":")+1):l(i.substr(0,i.indexOf(":")))>5&&(e=i))}else if(e.length>150||e.length<15){var c=t.getElementsByTagName("h1");c.length===1&&(e=this._getInnerText(c[0]))}e=e.trim().replace(this.REGEXPS.normalize," ");var n=l(e);return n<=4&&(!r||n!=l(i.replace(/[\\|\\-\\\\\\/>\xBB]+/g,""))-1)&&(e=i),e},_prepDocument:function(){var t=this._doc;this._removeNodes(this._getAllNodesWithTag(t,["style"])),t.body&&this._replaceBrs(t.body),this._replaceNodeTags(this._getAllNodesWithTag(t,["font"]),"SPAN")},_nextNode:function(t){for(var e=t;e&&e.nodeType!=this.ELEMENT_NODE&&this.REGEXPS.whitespace.test(e.textContent);)e=e.nextSibling;return e},_replaceBrs:function(t){this._forEachNode(this._getAllNodesWithTag(t,["br"]),function(e){for(var i=e.nextSibling,r=!1;(i=this._nextNode(i))&&i.tagName=="BR";){r=!0;var l=i.nextSibling;i.parentNode.removeChild(i),i=l}if(r){var a=this._doc.createElement("p");for(e.parentNode.replaceChild(a,e),i=a.nextSibling;i;){if(i.tagName=="BR"){var s=this._nextNode(i.nextSibling);if(s&&s.tagName=="BR")break}if(!this._isPhrasingContent(i))break;var h=i.nextSibling;a.appendChild(i),i=h}for(;a.lastChild&&this._isWhitespace(a.lastChild);)a.removeChild(a.lastChild);a.parentNode.tagName==="P"&&this._setNodeTag(a.parentNode,"DIV")}})},_setNodeTag:function(t,e){if(this.log("_setNodeTag",t,e),this._docJSDOMParser)return t.localName=e.toLowerCase(),t.tagName=e.toUpperCase(),t;for(var i=t.ownerDocument.createElement(e);t.firstChild;)i.appendChild(t.firstChild);t.parentNode.replaceChild(i,t),t.readability&&(i.readability=t.readability);for(var r=0;r<t.attributes.length;r++)try{i.setAttribute(t.attributes[r].name,t.attributes[r].value)}catch(l){}return i},_prepArticle:function(t){this._cleanStyles(t),this._markDataTables(t),this._fixLazyImages(t),this._cleanConditionally(t,"form"),this._cleanConditionally(t,"fieldset"),this._clean(t,"object"),this._clean(t,"embed"),this._clean(t,"footer"),this._clean(t,"link"),this._clean(t,"aside");var e=this.DEFAULT_CHAR_THRESHOLD;this._forEachNode(t.children,function(i){this._cleanMatchedNodes(i,function(r,l){return this.REGEXPS.shareElements.test(l)&&r.textContent.length<e})}),this._clean(t,"iframe"),this._clean(t,"input"),this._clean(t,"textarea"),this._clean(t,"select"),this._clean(t,"button"),this._cleanHeaders(t),this._cleanConditionally(t,"table"),this._cleanConditionally(t,"ul"),this._cleanConditionally(t,"div"),this._replaceNodeTags(this._getAllNodesWithTag(t,["h1"]),"h2"),this._removeNodes(this._getAllNodesWithTag(t,["p"]),function(i){var r=i.getElementsByTagName("img").length,l=i.getElementsByTagName("embed").length,a=i.getElementsByTagName("object").length,s=i.getElementsByTagName("iframe").length,h=r+l+a+s;return h===0&&!this._getInnerText(i,!1)}),this._forEachNode(this._getAllNodesWithTag(t,["br"]),function(i){var r=this._nextNode(i.nextSibling);r&&r.tagName=="P"&&i.parentNode.removeChild(i)}),this._forEachNode(this._getAllNodesWithTag(t,["table"]),function(i){var r=this._hasSingleTagInsideElement(i,"TBODY")?i.firstElementChild:i;if(this._hasSingleTagInsideElement(r,"TR")){var l=r.firstElementChild;if(this._hasSingleTagInsideElement(l,"TD")){var a=l.firstElementChild;a=this._setNodeTag(a,this._everyNode(a.childNodes,this._isPhrasingContent)?"P":"DIV"),i.parentNode.replaceChild(a,i)}}})},_initializeNode:function(t){switch(t.readability={contentScore:0},t.tagName){case"DIV":t.readability.contentScore+=5;break;case"PRE":case"TD":case"BLOCKQUOTE":t.readability.contentScore+=3;break;case"ADDRESS":case"OL":case"UL":case"DL":case"DD":case"DT":case"LI":case"FORM":t.readability.contentScore-=3;break;case"H1":case"H2":case"H3":case"H4":case"H5":case"H6":case"TH":t.readability.contentScore-=5;break}t.readability.contentScore+=this._getClassWeight(t)},_removeAndGetNext:function(t){var e=this._getNextNode(t,!0);return t.parentNode.removeChild(t),e},_getNextNode:function(t,e){if(!e&&t.firstElementChild)return t.firstElementChild;if(t.nextElementSibling)return t.nextElementSibling;do t=t.parentNode;while(t&&!t.nextElementSibling);return t&&t.nextElementSibling},_textSimilarity:function(t,e){var i=t.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean),r=e.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean);if(!i.length||!r.length)return 0;var l=r.filter(s=>!i.includes(s)),a=l.join(" ").length/r.join(" ").length;return 1-a},_checkByline:function(t,e){if(this._articleByline)return!1;if(t.getAttribute!==void 0)var i=t.getAttribute("rel"),r=t.getAttribute("itemprop");return(i==="author"||r&&r.indexOf("author")!==-1||this.REGEXPS.byline.test(e))&&this._isValidByline(t.textContent)?(this._articleByline=t.textContent.trim(),!0):!1},_getNodeAncestors:function(t,e){e=e||0;for(var i=0,r=[];t.parentNode&&(r.push(t.parentNode),!(e&&++i===e));)t=t.parentNode;return r},_grabArticle:function(t){this.log("**** grabArticle ****");var e=this._doc,i=t!==null;if(t=t||this._doc.body,!t)return this.log("No body found in document. Abort."),null;for(var r=t.innerHTML;;){this.log("Starting grabArticle loop");var l=this._flagIsActive(this.FLAG_STRIP_UNLIKELYS),a=[],s=this._doc.documentElement;let J=!0;for(;s;){s.tagName==="HTML"&&(this._articleLang=s.getAttribute("lang"));var h=s.className+" "+s.id;if(!this._isProbablyVisible(s)){this.log("Removing hidden node - "+h),s=this._removeAndGetNext(s);continue}if(s.getAttribute("aria-modal")=="true"&&s.getAttribute("role")=="dialog"){s=this._removeAndGetNext(s);continue}if(this._checkByline(s,h)){s=this._removeAndGetNext(s);continue}if(J&&this._headerDuplicatesTitle(s)){this.log("Removing header: ",s.textContent.trim(),this._articleTitle.trim()),J=!1,s=this._removeAndGetNext(s);continue}if(l){if(this.REGEXPS.unlikelyCandidates.test(h)&&!this.REGEXPS.okMaybeItsACandidate.test(h)&&!this._hasAncestorTag(s,"table")&&!this._hasAncestorTag(s,"code")&&s.tagName!=="BODY"&&s.tagName!=="A"){this.log("Removing unlikely candidate - "+h),s=this._removeAndGetNext(s);continue}if(this.UNLIKELY_ROLES.includes(s.getAttribute("role"))){this.log("Removing content with role "+s.getAttribute("role")+" - "+h),s=this._removeAndGetNext(s);continue}}if((s.tagName==="DIV"||s.tagName==="SECTION"||s.tagName==="HEADER"||s.tagName==="H1"||s.tagName==="H2"||s.tagName==="H3"||s.tagName==="H4"||s.tagName==="H5"||s.tagName==="H6")&&this._isElementWithoutContent(s)){s=this._removeAndGetNext(s);continue}if(this.DEFAULT_TAGS_TO_SCORE.indexOf(s.tagName)!==-1&&a.push(s),s.tagName==="DIV"){for(var c=null,n=s.firstChild;n;){var u=n.nextSibling;if(this._isPhrasingContent(n))c!==null?c.appendChild(n):this._isWhitespace(n)||(c=e.createElement("p"),s.replaceChild(c,n),c.appendChild(n));else if(c!==null){for(;c.lastChild&&this._isWhitespace(c.lastChild);)c.removeChild(c.lastChild);c=null}n=u}if(this._hasSingleTagInsideElement(s,"P")&&this._getLinkDensity(s)<.25){var m=s.children[0];s.parentNode.replaceChild(m,s),s=m,a.push(s)}else this._hasChildBlockElement(s)||(s=this._setNodeTag(s,"P"),a.push(s))}s=this._getNextNode(s)}var b=[];this._forEachNode(a,function(A){if(!(!A.parentNode||typeof A.parentNode.tagName=="undefined")){var T=this._getInnerText(A);if(!(T.length<25)){var K=this._getNodeAncestors(A,5);if(K.length!==0){var C=0;C+=1,C+=T.split(this.REGEXPS.commas).length,C+=Math.min(Math.floor(T.length/100),3),this._forEachNode(K,function(S,F){if(!(!S.tagName||!S.parentNode||typeof S.parentNode.tagName=="undefined")){if(typeof S.readability=="undefined"&&(this._initializeNode(S),b.push(S)),F===0)var X=1;else F===1?X=2:X=F*3;S.readability.contentScore+=C/X}})}}}});for(var N=[],v=0,y=b.length;v<y;v+=1){var E=b[v],d=E.readability.contentScore*(1-this._getLinkDensity(E));E.readability.contentScore=d,this.log("Candidate:",E,"with score "+d);for(var p=0;p<this._nbTopCandidates;p++){var x=N[p];if(!x||d>x.readability.contentScore){N.splice(p,0,E),N.length>this._nbTopCandidates&&N.pop();break}}}var o=N[0]||null,L=!1,g;if(o===null||o.tagName==="BODY"){for(o=e.createElement("DIV"),L=!0;t.firstChild;)this.log("Moving child out:",t.firstChild),o.appendChild(t.firstChild);t.appendChild(o),this._initializeNode(o)}else if(o){for(var I=[],P=1;P<N.length;P++)N[P].readability.contentScore/o.readability.contentScore>=.75&&I.push(this._getNodeAncestors(N[P]));var O=3;if(I.length>=O)for(g=o.parentNode;g.tagName!=="BODY";){for(var G=0,H=0;H<I.length&&G<O;H++)G+=Number(I[H].includes(g));if(G>=O){o=g;break}g=g.parentNode}o.readability||this._initializeNode(o),g=o.parentNode;for(var M=o.readability.contentScore,Q=M/3;g.tagName!=="BODY";){if(!g.readability){g=g.parentNode;continue}var V=g.readability.contentScore;if(V<Q)break;if(V>M){o=g;break}M=g.readability.contentScore,g=g.parentNode}for(g=o.parentNode;g.tagName!="BODY"&&g.children.length==1;)o=g,g=o.parentNode;o.readability||this._initializeNode(o)}var _=e.createElement("DIV");i&&(_.id="readability-content");var Z=Math.max(10,o.readability.contentScore*.2);g=o.parentNode;for(var U=g.children,w=0,j=U.length;w<j;w++){var f=U[w],R=!1;if(this.log("Looking at sibling node:",f,f.readability?"with score "+f.readability.contentScore:""),this.log("Sibling has score",f.readability?f.readability.contentScore:"Unknown"),f===o)R=!0;else{var $=0;if(f.className===o.className&&o.className!==""&&($+=o.readability.contentScore*.2),f.readability&&f.readability.contentScore+$>=Z)R=!0;else if(f.nodeName==="P"){var Y=this._getLinkDensity(f),z=this._getInnerText(f),k=z.length;(k>80&&Y<.25||k<80&&k>0&&Y===0&&z.search(/\\.( |$)/)!==-1)&&(R=!0)}}R&&(this.log("Appending node:",f),this.ALTER_TO_DIV_EXCEPTIONS.indexOf(f.nodeName)===-1&&(this.log("Altering sibling:",f,"to div."),f=this._setNodeTag(f,"DIV")),_.appendChild(f),U=g.children,w-=1,j-=1)}if(this._debug&&this.log("Article content pre-prep: "+_.innerHTML),this._prepArticle(_),this._debug&&this.log("Article content post-prep: "+_.innerHTML),L)o.id="readability-page-1",o.className="page";else{var B=e.createElement("DIV");for(B.id="readability-page-1",B.className="page";_.firstChild;)B.appendChild(_.firstChild);_.appendChild(B)}this._debug&&this.log("Article content after paging: "+_.innerHTML);var W=!0,D=this._getInnerText(_,!0).length;if(D<this._charThreshold)if(W=!1,t.innerHTML=r,this._flagIsActive(this.FLAG_STRIP_UNLIKELYS))this._removeFlag(this.FLAG_STRIP_UNLIKELYS),this._attempts.push({articleContent:_,textLength:D});else if(this._flagIsActive(this.FLAG_WEIGHT_CLASSES))this._removeFlag(this.FLAG_WEIGHT_CLASSES),this._attempts.push({articleContent:_,textLength:D});else if(this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY))this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY),this._attempts.push({articleContent:_,textLength:D});else{if(this._attempts.push({articleContent:_,textLength:D}),this._attempts.sort(function(A,T){return T.textLength-A.textLength}),!this._attempts[0].textLength)return null;_=this._attempts[0].articleContent,W=!0}if(W){var tt=[g,o].concat(this._getNodeAncestors(g));return this._someNode(tt,function(A){if(!A.tagName)return!1;var T=A.getAttribute("dir");return T?(this._articleDir=T,!0):!1}),_}}},_isValidByline:function(t){return typeof t=="string"||t instanceof String?(t=t.trim(),t.length>0&&t.length<100):!1},_unescapeHtmlEntities:function(t){if(!t)return t;var e=this.HTML_ESCAPE_MAP;return t.replace(/&(quot|amp|apos|lt|gt);/g,function(i,r){return e[r]}).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi,function(i,r,l){var a=parseInt(r||l,r?16:10);return String.fromCharCode(a)})},_getJSONLD:function(t){var e=this._getAllNodesWithTag(t,["script"]),i;return this._forEachNode(e,function(r){if(!i&&r.getAttribute("type")==="application/ld+json")try{var l=r.textContent.replace(/^\\s*<!\\[CDATA\\[|\\]\\]>\\s*$/g,""),a=JSON.parse(l);if(!a["@context"]||!a["@context"].match(/^https?\\:\\/\\/schema\\.org$/)||(!a["@type"]&&Array.isArray(a["@graph"])&&(a=a["@graph"].find(function(n){return(n["@type"]||"").match(this.REGEXPS.jsonLdArticleTypes)})),!a||!a["@type"]||!a["@type"].match(this.REGEXPS.jsonLdArticleTypes)))return;if(i={},typeof a.name=="string"&&typeof a.headline=="string"&&a.name!==a.headline){var s=this._getArticleTitle(),h=this._textSimilarity(a.name,s)>.75,c=this._textSimilarity(a.headline,s)>.75;c&&!h?i.title=a.headline:i.title=a.name}else typeof a.name=="string"?i.title=a.name.trim():typeof a.headline=="string"&&(i.title=a.headline.trim());a.author&&(typeof a.author.name=="string"?i.byline=a.author.name.trim():Array.isArray(a.author)&&a.author[0]&&typeof a.author[0].name=="string"&&(i.byline=a.author.filter(function(n){return n&&typeof n.name=="string"}).map(function(n){return n.name.trim()}).join(", "))),typeof a.description=="string"&&(i.excerpt=a.description.trim()),a.publisher&&typeof a.publisher.name=="string"&&(i.siteName=a.publisher.name.trim()),typeof a.datePublished=="string"&&(i.datePublished=a.datePublished.trim());return}catch(n){this.log(n.message)}}),i||{}},_getArticleMetadata:function(t){var e={},i={},r=this._doc.getElementsByTagName("meta"),l=/\\s*(article|dc|dcterm|og|twitter)\\s*:\\s*(author|creator|description|published_time|title|site_name)\\s*/gi,a=/^\\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\\s*[\\.:]\\s*)?(author|creator|description|title|site_name)\\s*$/i;return this._forEachNode(r,function(s){var h=s.getAttribute("name"),c=s.getAttribute("property"),n=s.getAttribute("content");if(n){var u=null,m=null;c&&(u=c.match(l),u&&(m=u[0].toLowerCase().replace(/\\s/g,""),i[m]=n.trim())),!u&&h&&a.test(h)&&(m=h,n&&(m=m.toLowerCase().replace(/\\s/g,"").replace(/\\./g,":"),i[m]=n.trim()))}}),e.title=t.title||i["dc:title"]||i["dcterm:title"]||i["og:title"]||i["weibo:article:title"]||i["weibo:webpage:title"]||i.title||i["twitter:title"],e.title||(e.title=this._getArticleTitle()),e.byline=t.byline||i["dc:creator"]||i["dcterm:creator"]||i.author,e.excerpt=t.excerpt||i["dc:description"]||i["dcterm:description"]||i["og:description"]||i["weibo:article:description"]||i["weibo:webpage:description"]||i.description||i["twitter:description"],e.siteName=t.siteName||i["og:site_name"],e.publishedTime=t.datePublished||i["article:published_time"]||null,e.title=this._unescapeHtmlEntities(e.title),e.byline=this._unescapeHtmlEntities(e.byline),e.excerpt=this._unescapeHtmlEntities(e.excerpt),e.siteName=this._unescapeHtmlEntities(e.siteName),e.publishedTime=this._unescapeHtmlEntities(e.publishedTime),e},_isSingleImage:function(t){return t.tagName==="IMG"?!0:t.children.length!==1||t.textContent.trim()!==""?!1:this._isSingleImage(t.children[0])},_unwrapNoscriptImages:function(t){var e=Array.from(t.getElementsByTagName("img"));this._forEachNode(e,function(r){for(var l=0;l<r.attributes.length;l++){var a=r.attributes[l];switch(a.name){case"src":case"srcset":case"data-src":case"data-srcset":return}if(/\\.(jpg|jpeg|png|webp)/i.test(a.value))return}r.parentNode.removeChild(r)});var i=Array.from(t.getElementsByTagName("noscript"));this._forEachNode(i,function(r){var l=t.createElement("div");if(l.innerHTML=r.innerHTML,!!this._isSingleImage(l)){var a=r.previousElementSibling;if(a&&this._isSingleImage(a)){var s=a;s.tagName!=="IMG"&&(s=a.getElementsByTagName("img")[0]);for(var h=l.getElementsByTagName("img")[0],c=0;c<s.attributes.length;c++){var n=s.attributes[c];if(n.value!==""&&(n.name==="src"||n.name==="srcset"||/\\.(jpg|jpeg|png|webp)/i.test(n.value))){if(h.getAttribute(n.name)===n.value)continue;var u=n.name;h.hasAttribute(u)&&(u="data-old-"+u),h.setAttribute(u,n.value)}}r.parentNode.replaceChild(l.firstElementChild,a)}}})},_removeScripts:function(t){this._removeNodes(this._getAllNodesWithTag(t,["script","noscript"]))},_hasSingleTagInsideElement:function(t,e){return t.children.length!=1||t.children[0].tagName!==e?!1:!this._someNode(t.childNodes,function(i){return i.nodeType===this.TEXT_NODE&&this.REGEXPS.hasContent.test(i.textContent)})},_isElementWithoutContent:function(t){return t.nodeType===this.ELEMENT_NODE&&t.textContent.trim().length==0&&(t.children.length==0||t.children.length==t.getElementsByTagName("br").length+t.getElementsByTagName("hr").length)},_hasChildBlockElement:function(t){return this._someNode(t.childNodes,function(e){return this.DIV_TO_P_ELEMS.has(e.tagName)||this._hasChildBlockElement(e)})},_isPhrasingContent:function(t){return t.nodeType===this.TEXT_NODE||this.PHRASING_ELEMS.indexOf(t.tagName)!==-1||(t.tagName==="A"||t.tagName==="DEL"||t.tagName==="INS")&&this._everyNode(t.childNodes,this._isPhrasingContent)},_isWhitespace:function(t){return t.nodeType===this.TEXT_NODE&&t.textContent.trim().length===0||t.nodeType===this.ELEMENT_NODE&&t.tagName==="BR"},_getInnerText:function(t,e){e=typeof e=="undefined"?!0:e;var i=t.textContent.trim();return e?i.replace(this.REGEXPS.normalize," "):i},_getCharCount:function(t,e){return e=e||",",this._getInnerText(t).split(e).length-1},_cleanStyles:function(t){if(!(!t||t.tagName.toLowerCase()==="svg")){for(var e=0;e<this.PRESENTATIONAL_ATTRIBUTES.length;e++)t.removeAttribute(this.PRESENTATIONAL_ATTRIBUTES[e]);this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.indexOf(t.tagName)!==-1&&(t.removeAttribute("width"),t.removeAttribute("height"));for(var i=t.firstElementChild;i!==null;)this._cleanStyles(i),i=i.nextElementSibling}},_getLinkDensity:function(t){var e=this._getInnerText(t).length;if(e===0)return 0;var i=0;return this._forEachNode(t.getElementsByTagName("a"),function(r){var l=r.getAttribute("href"),a=l&&this.REGEXPS.hashUrl.test(l)?.3:1;i+=this._getInnerText(r).length*a}),i/e},_getClassWeight:function(t){if(!this._flagIsActive(this.FLAG_WEIGHT_CLASSES))return 0;var e=0;return typeof t.className=="string"&&t.className!==""&&(this.REGEXPS.negative.test(t.className)&&(e-=25),this.REGEXPS.positive.test(t.className)&&(e+=25)),typeof t.id=="string"&&t.id!==""&&(this.REGEXPS.negative.test(t.id)&&(e-=25),this.REGEXPS.positive.test(t.id)&&(e+=25)),e},_clean:function(t,e){var i=["object","embed","iframe"].indexOf(e)!==-1;this._removeNodes(this._getAllNodesWithTag(t,[e]),function(r){if(i){for(var l=0;l<r.attributes.length;l++)if(this._allowedVideoRegex.test(r.attributes[l].value))return!1;if(r.tagName==="object"&&this._allowedVideoRegex.test(r.innerHTML))return!1}return!0})},_hasAncestorTag:function(t,e,i,r){i=i||3,e=e.toUpperCase();for(var l=0;t.parentNode;){if(i>0&&l>i)return!1;if(t.parentNode.tagName===e&&(!r||r(t.parentNode)))return!0;t=t.parentNode,l++}return!1},_getRowAndColumnCount:function(t){for(var e=0,i=0,r=t.getElementsByTagName("tr"),l=0;l<r.length;l++){var a=r[l].getAttribute("rowspan")||0;a&&(a=parseInt(a,10)),e+=a||1;for(var s=0,h=r[l].getElementsByTagName("td"),c=0;c<h.length;c++){var n=h[c].getAttribute("colspan")||0;n&&(n=parseInt(n,10)),s+=n||1}i=Math.max(i,s)}return{rows:e,columns:i}},_markDataTables:function(t){for(var e=t.getElementsByTagName("table"),i=0;i<e.length;i++){var r=e[i],l=r.getAttribute("role");if(l=="presentation"){r._readabilityDataTable=!1;continue}var a=r.getAttribute("datatable");if(a=="0"){r._readabilityDataTable=!1;continue}var s=r.getAttribute("summary");if(s){r._readabilityDataTable=!0;continue}var h=r.getElementsByTagName("caption")[0];if(h&&h.childNodes.length>0){r._readabilityDataTable=!0;continue}var c=["col","colgroup","tfoot","thead","th"],n=function(m){return!!r.getElementsByTagName(m)[0]};if(c.some(n)){this.log("Data table because found data-y descendant"),r._readabilityDataTable=!0;continue}if(r.getElementsByTagName("table")[0]){r._readabilityDataTable=!1;continue}var u=this._getRowAndColumnCount(r);if(u.rows>=10||u.columns>4){r._readabilityDataTable=!0;continue}r._readabilityDataTable=u.rows*u.columns>10}},_fixLazyImages:function(t){this._forEachNode(this._getAllNodesWithTag(t,["img","picture","figure"]),function(e){if(e.src&&this.REGEXPS.b64DataUrl.test(e.src)){var i=this.REGEXPS.b64DataUrl.exec(e.src);if(i[1]==="image/svg+xml")return;for(var r=!1,l=0;l<e.attributes.length;l++){var a=e.attributes[l];if(a.name!=="src"&&/\\.(jpg|jpeg|png|webp)/i.test(a.value)){r=!0;break}}if(r){var s=e.src.search(/base64\\s*/i)+7,h=e.src.length-s;h<133&&e.removeAttribute("src")}}if(!((e.src||e.srcset&&e.srcset!="null")&&e.className.toLowerCase().indexOf("lazy")===-1)){for(var c=0;c<e.attributes.length;c++)if(a=e.attributes[c],!(a.name==="src"||a.name==="srcset"||a.name==="alt")){var n=null;if(/\\.(jpg|jpeg|png|webp)\\s+\\d/.test(a.value)?n="srcset":/^\\s*\\S+\\.(jpg|jpeg|png|webp)\\S*\\s*$/.test(a.value)&&(n="src"),n){if(e.tagName==="IMG"||e.tagName==="PICTURE")e.setAttribute(n,a.value);else if(e.tagName==="FIGURE"&&!this._getAllNodesWithTag(e,["img","picture"]).length){var u=this._doc.createElement("img");u.setAttribute(n,a.value),e.appendChild(u)}}}}})},_getTextDensity:function(t,e){var i=this._getInnerText(t,!0).length;if(i===0)return 0;var r=0,l=this._getAllNodesWithTag(t,e);return this._forEachNode(l,a=>r+=this._getInnerText(a,!0).length),r/i},_cleanConditionally:function(t,e){this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)&&this._removeNodes(this._getAllNodesWithTag(t,[e]),function(i){var r=function(g){return g._readabilityDataTable},l=e==="ul"||e==="ol";if(!l){var a=0,s=this._getAllNodesWithTag(i,["ul","ol"]);this._forEachNode(s,g=>a+=this._getInnerText(g).length),l=a/this._getInnerText(i).length>.9}if(e==="table"&&r(i)||this._hasAncestorTag(i,"table",-1,r)||this._hasAncestorTag(i,"code"))return!1;var h=this._getClassWeight(i);this.log("Cleaning Conditionally",i);var c=0;if(h+c<0)return!0;if(this._getCharCount(i,",")<10){for(var n=i.getElementsByTagName("p").length,u=i.getElementsByTagName("img").length,m=i.getElementsByTagName("li").length-100,b=i.getElementsByTagName("input").length,N=this._getTextDensity(i,["h1","h2","h3","h4","h5","h6"]),v=0,y=this._getAllNodesWithTag(i,["object","embed","iframe"]),E=0;E<y.length;E++){for(var d=0;d<y[E].attributes.length;d++)if(this._allowedVideoRegex.test(y[E].attributes[d].value))return!1;if(y[E].tagName==="object"&&this._allowedVideoRegex.test(y[E].innerHTML))return!1;v++}var p=this._getLinkDensity(i),x=this._getInnerText(i).length,o=u>1&&n/u<.5&&!this._hasAncestorTag(i,"figure")||!l&&m>n||b>Math.floor(n/3)||!l&&N<.9&&x<25&&(u===0||u>2)&&!this._hasAncestorTag(i,"figure")||!l&&h<25&&p>.2||h>=25&&p>.5||v===1&&x<75||v>1;if(l&&o){for(var L=0;L<i.children.length;L++)if(i.children[L].children.length>1)return o;let g=i.getElementsByTagName("li").length;if(u==g)return!1}return o}return!1})},_cleanMatchedNodes:function(t,e){for(var i=this._getNextNode(t,!0),r=this._getNextNode(t);r&&r!=i;)e.call(this,r,r.className+" "+r.id)?r=this._removeAndGetNext(r):r=this._getNextNode(r)},_cleanHeaders:function(t){let e=this._getAllNodesWithTag(t,["h1","h2"]);this._removeNodes(e,function(i){let r=this._getClassWeight(i)<0;return r&&this.log("Removing header with low class weight:",i),r})},_headerDuplicatesTitle:function(t){if(t.tagName!="H1"&&t.tagName!="H2")return!1;var e=this._getInnerText(t,!1);return this.log("Evaluating similarity of header:",e,this._articleTitle),this._textSimilarity(this._articleTitle,e)>.75},_flagIsActive:function(t){return(this._flags&t)>0},_removeFlag:function(t){this._flags=this._flags&~t},_isProbablyVisible:function(t){return(!t.style||t.style.display!="none")&&(!t.style||t.style.visibility!="hidden")&&!t.hasAttribute("hidden")&&(!t.hasAttribute("aria-hidden")||t.getAttribute("aria-hidden")!="true"||t.className&&t.className.indexOf&&t.className.indexOf("fallback-image")!==-1)},parse:function(){if(this._maxElemsToParse>0){var t=this._doc.getElementsByTagName("*").length;if(t>this._maxElemsToParse)throw new Error("Aborting parsing document; "+t+" elements found")}this._unwrapNoscriptImages(this._doc);var e=this._disableJSONLD?{}:this._getJSONLD(this._doc);this._removeScripts(this._doc),this._prepDocument();var i=this._getArticleMetadata(e);this._articleTitle=i.title;var r=this._grabArticle();if(!r)return null;if(this.log("Grabbed: "+r.innerHTML),this._postProcessContent(r),!i.excerpt){var l=r.getElementsByTagName("p");l.length>0&&(i.excerpt=l[0].textContent.trim())}var a=r.textContent;return{title:this._articleTitle,byline:i.byline||this._articleByline,dir:this._articleDir,lang:this._articleLang,content:this._serializer(r),textContent:a,length:a.length,excerpt:i.excerpt,siteName:i.siteName||this._articleSiteName,publishedTime:i.publishedTime}}};typeof module=="object"&&(module.exports=q);\n';
648
+
649
+ // src/libs/browser-search/search.ts
650
+ var import_logger4 = require("@agent-infra/logger");
651
+
652
+ // src/libs/browser-search/utils.ts
653
+ var import_turndown = __toESM(require("turndown"), 1);
654
+ var import_turndown_plugin_gfm = require("turndown-plugin-gfm");
655
+ var import_logger3 = require("@agent-infra/logger");
656
+ var import_user_agents = __toESM(require("user-agents"), 1);
657
+ var parseUrl = (url2) => {
658
+ try {
659
+ return new URL(url2);
660
+ } catch {
661
+ return null;
662
+ }
663
+ };
664
+ var shouldSkipDomain = (url2) => {
665
+ const parsed = parseUrl(url2);
666
+ if (!parsed) return true;
667
+ const { hostname } = parsed;
668
+ return [
669
+ "reddit.com",
670
+ "www.reddit.com",
671
+ "x.com",
672
+ "twitter.com",
673
+ "www.twitter.com",
674
+ "youtube.com",
675
+ "www.youtube.com"
676
+ ].includes(hostname);
677
+ };
678
+ async function applyStealthScripts(page) {
679
+ const userAgent = new import_user_agents.default({
680
+ deviceCategory: "desktop"
681
+ }).toString();
682
+ await page.setBypassCSP(true);
683
+ await page.setUserAgent(userAgent);
684
+ await page.evaluate(() => {
685
+ Object.defineProperty(navigator, "webdriver", {
686
+ get: () => void 0
687
+ });
688
+ Object.defineProperty(navigator, "languages", {
689
+ get: () => ["en-US", "en"]
690
+ });
691
+ Object.defineProperty(navigator, "plugins", {
692
+ get: () => [{}, {}, {}, {}, {}]
693
+ });
694
+ Object.defineProperty(navigator, "headless", {
695
+ get: () => false
696
+ });
697
+ const originalQuery = window.navigator.permissions.query;
698
+ window.navigator.permissions.query = (parameters) => parameters.name === "notifications" ? Promise.resolve({
699
+ state: Notification.permission
700
+ }) : originalQuery(parameters);
701
+ });
702
+ }
703
+ async function interceptRequest(page) {
704
+ await applyStealthScripts(page);
705
+ await page.setRequestInterception(true);
706
+ page.on("request", (request) => {
707
+ const resourceType = request.resourceType();
708
+ if (resourceType !== "document") {
709
+ return request.abort();
710
+ }
711
+ if (request.isNavigationRequest()) {
712
+ return request.continue();
713
+ }
714
+ return request.abort();
715
+ });
716
+ }
717
+ function extractPageInformation(window2, readabilityScript) {
718
+ const Readability = new Function(
719
+ "module",
720
+ `${readabilityScript}
721
+ return module.exports`
722
+ )({});
723
+ const document2 = window2.document;
724
+ document2.querySelectorAll(
725
+ "script,noscript,style,link,svg,img,video,iframe,canvas,.reflist"
726
+ ).forEach((el) => el.remove());
727
+ const article = new Readability(document2).parse();
728
+ const content = article?.content || "";
729
+ const title = document2.title;
147
730
  return {
148
- results,
149
- success: true
731
+ content,
732
+ title: article?.title || title
150
733
  };
151
734
  }
735
+ function toMarkdown(html, options = {}) {
736
+ if (!html) return "";
737
+ try {
738
+ const {
739
+ codeBlockStyle = "fenced",
740
+ headingStyle = "atx",
741
+ emDelimiter = "*",
742
+ strongDelimiter = "**",
743
+ gfmExtension = true
744
+ } = options;
745
+ const turndown = new import_turndown.default({
746
+ codeBlockStyle,
747
+ headingStyle,
748
+ emDelimiter,
749
+ strongDelimiter
750
+ });
751
+ if (gfmExtension) {
752
+ turndown.use(import_turndown_plugin_gfm.gfm);
753
+ }
754
+ return turndown.turndown(html);
755
+ } catch (error) {
756
+ import_logger3.defaultLogger.error("Error converting HTML to Markdown:", error);
757
+ return html;
758
+ }
759
+ }
760
+
761
+ // src/libs/browser-search/queue.ts
762
+ var PromiseQueue = class {
763
+ queue = [];
764
+ concurrency;
765
+ running = 0;
766
+ results = [];
767
+ constructor(concurrency = 1) {
768
+ this.concurrency = concurrency;
769
+ }
770
+ add(task) {
771
+ return new Promise((resolve, reject) => {
772
+ this.queue.push(async () => {
773
+ try {
774
+ const result = await task();
775
+ resolve(result);
776
+ return result;
777
+ } catch (error) {
778
+ reject(error);
779
+ throw error;
780
+ }
781
+ });
782
+ this.run();
783
+ });
784
+ }
785
+ async run() {
786
+ if (this.running >= this.concurrency || this.queue.length === 0) {
787
+ return;
788
+ }
789
+ this.running++;
790
+ const task = this.queue.shift();
791
+ try {
792
+ const result = await task();
793
+ this.results.push(result);
794
+ } catch (error) {
795
+ } finally {
796
+ this.running--;
797
+ this.run();
798
+ }
799
+ }
800
+ async waitAll() {
801
+ while (this.running > 0 || this.queue.length > 0) {
802
+ await new Promise((resolve) => setTimeout(resolve, 100));
803
+ }
804
+ return this.results;
805
+ }
806
+ };
807
+
808
+ // src/libs/browser-search/engines/bing.ts
809
+ var BingSearchEngine = class {
810
+ /**
811
+ * Generates a Bing search URL based on the provided query and options.
812
+ *
813
+ * @param query - The search query string
814
+ * @param options - Search configuration options
815
+ * @param options.count - Number of search results to request (default: 10)
816
+ * @param options.excludeDomains - Array of domain names to exclude from search results
817
+ * @returns Formatted Bing search URL as a string
818
+ */
819
+ getSearchUrl(query, options) {
820
+ const searchParams = new URLSearchParams({
821
+ q: `${options.excludeDomains && options.excludeDomains.length > 0 ? `${options.excludeDomains.map((domain) => `-site:${domain}`).join(" ")} ` : ""}${query}`,
822
+ count: `${options.count || 10}`
823
+ });
824
+ return `https://www.bing.com/search?${searchParams.toString()}`;
825
+ }
826
+ /**
827
+ * Extracts search results from a Bing search page.
828
+ *
829
+ * @param window - The browser window object containing the loaded Bing search page
830
+ * @returns Array of search results extracted from the page
831
+ */
832
+ extractSearchResults(window2) {
833
+ const links = [];
834
+ const document2 = window2.document;
835
+ const isValidUrl = (url2) => {
836
+ try {
837
+ new URL(url2);
838
+ return true;
839
+ } catch (error) {
840
+ return false;
841
+ }
842
+ };
843
+ const extractSnippet = (element) => {
844
+ const clone = element.cloneNode(true);
845
+ const titleElements = clone.querySelectorAll("h2");
846
+ titleElements.forEach((el) => el.remove());
847
+ const citeElements = clone.querySelectorAll(".b_attribution");
848
+ citeElements.forEach((el) => el.remove());
849
+ const scriptElements = clone.querySelectorAll("script, style");
850
+ scriptElements.forEach((el) => el.remove());
851
+ const text = Array.from(clone.querySelectorAll("*")).filter((node) => node.textContent?.trim()).map((node) => node.textContent?.trim()).filter(Boolean).reduce((acc, curr) => {
852
+ if (!acc.some(
853
+ (text2) => text2.includes(curr) || curr.includes(text2)
854
+ )) {
855
+ acc.push(curr);
856
+ }
857
+ return acc;
858
+ }, []).join(" ").trim().replace(/\s+/g, " ");
859
+ return text;
860
+ };
861
+ try {
862
+ const elements = document2.querySelectorAll(".b_algo");
863
+ elements.forEach((element) => {
864
+ const titleEl = element.querySelector("h2");
865
+ const urlEl = element.querySelector("h2 a");
866
+ const url2 = urlEl?.getAttribute("href");
867
+ const snippet = extractSnippet(element);
868
+ if (!url2 || !isValidUrl(url2)) return;
869
+ const item = {
870
+ title: titleEl?.textContent || "",
871
+ snippet,
872
+ url: url2,
873
+ content: ""
874
+ };
875
+ if (!item.title || !item.url) return;
876
+ links.push(item);
877
+ });
878
+ } catch (error) {
879
+ console.error("Error extracting search results from Bing:", error);
880
+ throw error;
881
+ }
882
+ return links;
883
+ }
884
+ /**
885
+ * Waits for Bing search results to load completely.
886
+ *
887
+ * @param page - The Puppeteer page object
888
+ * @returns Promise that resolves when search results are loaded
889
+ */
890
+ async waitForSearchResults(page, timeout) {
891
+ await page.waitForSelector("#b_results", {
892
+ timeout: timeout ?? 1e4
893
+ });
894
+ }
895
+ };
896
+
897
+ // src/libs/browser-search/engines/baidu.ts
898
+ var BaiduSearchEngine = class {
899
+ /**
900
+ * Generates a Baidu search URL based on the provided query and options.
901
+ *
902
+ * @param query - The search query string
903
+ * @param options - Search configuration options
904
+ * @param options.count - Number of search results to request (default: 10)
905
+ * @param options.excludeDomains - Array of domain names to exclude from search results
906
+ * @returns Formatted Baidu search URL as a string
907
+ */
908
+ getSearchUrl(query, options) {
909
+ const excludeDomainsQuery = options.excludeDomains && options.excludeDomains.length > 0 ? options.excludeDomains.map((domain) => `-site:${domain}`).join(" ") : "";
910
+ const searchParams = new URLSearchParams({
911
+ wd: excludeDomainsQuery ? `${excludeDomainsQuery} ${query}` : query,
912
+ rn: `${options.count || 10}`
913
+ // rn is the parameter for result count
914
+ });
915
+ return `https://www.baidu.com/s?${searchParams.toString()}`;
916
+ }
917
+ /**
918
+ * Extracts search results from a Baidu search page.
919
+ *
920
+ * @param window - The browser window object containing the loaded Baidu search page
921
+ * @returns Array of search results extracted from the page
922
+ */
923
+ extractSearchResults(window2) {
924
+ const links = [];
925
+ const document2 = window2.document;
926
+ try {
927
+ const elements = document2.querySelectorAll(".result");
928
+ elements.forEach((element) => {
929
+ const titleEl = element.querySelector(".t a");
930
+ const url2 = titleEl?.getAttribute("href");
931
+ const snippetEl = element.querySelector(".c-span-last .content-right_2s-H4");
932
+ if (!url2) return;
933
+ const item = {
934
+ title: titleEl?.textContent || "",
935
+ url: url2,
936
+ // Note: Baidu uses redirects, we'll need to follow them
937
+ snippet: snippetEl?.textContent || "",
938
+ content: ""
939
+ };
940
+ if (!item.title || !item.url) return;
941
+ links.push(item);
942
+ });
943
+ } catch (error) {
944
+ console.error("Error extracting search results from Baidu:", error);
945
+ }
946
+ return links;
947
+ }
948
+ /**
949
+ * Waits for Bing search results to load completely.
950
+ *
951
+ * @param page - The Puppeteer page object
952
+ * @returns Promise that resolves when search results are loaded
953
+ */
954
+ async waitForSearchResults(page, timeout) {
955
+ await page.waitForSelector("#page", {
956
+ timeout: timeout ?? 1e4
957
+ });
958
+ }
959
+ };
960
+
961
+ // src/libs/browser-search/engines/sogou.ts
962
+ var SogouSearchEngine = class {
963
+ /**
964
+ * Generates a Sogou search URL based on the provided query and options.
965
+ *
966
+ * @param query - The search query string
967
+ * @param options - Search configuration options
968
+ * @param options.count - Number of search results to request (default: 10)
969
+ * @param options.excludeDomains - Array of domain names to exclude from search results
970
+ * @returns Formatted Sogou search URL as a string
971
+ */
972
+ getSearchUrl(query, options) {
973
+ const { count = 10, excludeDomains = [] } = options;
974
+ const excludeDomainsQuery = excludeDomains && excludeDomains.length > 0 ? excludeDomains.map((domain) => `-site:${domain}`).join(" ") : "";
975
+ const searchParams = new URLSearchParams({
976
+ query: `${excludeDomainsQuery ? `${excludeDomainsQuery} ` : ""}${query}`,
977
+ num: `${count}`
978
+ });
979
+ return `https://www.sogou.com/web?${searchParams.toString()}`;
980
+ }
981
+ /**
982
+ * !NOTE: This function runs in the context of the browser page, not Node.js
983
+ *
984
+ * Extract search results from Sogou
985
+ * @param window - The window object
986
+ * @returns Search results
987
+ */
988
+ extractSearchResults(window2) {
989
+ const links = [];
990
+ const document2 = window2.document;
991
+ const isValidUrl = (url2) => {
992
+ try {
993
+ new URL(url2);
994
+ return true;
995
+ } catch (error) {
996
+ return false;
997
+ }
998
+ };
999
+ const EndPoints = "https://www.sogou.com";
1000
+ const SELECTOR = {
1001
+ results: ".results .vrwrap",
1002
+ resultTitle: ".vr-title",
1003
+ resultLink: ".vr-title > a",
1004
+ resultSnippet: [".star-wiki", ".fz-mid", ".attribute-centent"],
1005
+ resultSnippetExcluded: [".text-lightgray", ".zan-box", ".tag-website"],
1006
+ related: "#main .vrwrap.middle-better-hintBox .hint-mid"
1007
+ };
1008
+ try {
1009
+ const elements = document2.querySelectorAll(SELECTOR.results);
1010
+ elements.forEach((element) => {
1011
+ const titleEl = element.querySelector(SELECTOR.resultTitle);
1012
+ let url2 = element.querySelector(SELECTOR.resultLink)?.getAttribute("href");
1013
+ const snippets = SELECTOR.resultSnippet.map((selector) => {
1014
+ SELECTOR.resultSnippetExcluded.forEach((excludedSelector) => {
1015
+ const el2 = element.querySelector(excludedSelector);
1016
+ el2?.remove();
1017
+ });
1018
+ const el = element.querySelector(selector);
1019
+ return el?.textContent?.trim() || "";
1020
+ });
1021
+ if (!url2?.includes("http")) url2 = `${EndPoints}${url2}`;
1022
+ if (!url2?.trim() || !isValidUrl(url2)) return;
1023
+ const item = {
1024
+ title: titleEl?.textContent?.trim() || "",
1025
+ url: url2,
1026
+ snippet: snippets.join(""),
1027
+ content: ""
1028
+ };
1029
+ if (!item.title || !item.url) return;
1030
+ links.push(item);
1031
+ });
1032
+ } catch (error) {
1033
+ const msg = error instanceof Error ? error.message : String(error);
1034
+ console.error("Error extracting search results from Sogou:", msg);
1035
+ throw error;
1036
+ }
1037
+ return links;
1038
+ }
1039
+ /**
1040
+ * Waits for Bing search results to load completely.
1041
+ *
1042
+ * @param page - The Puppeteer page object
1043
+ * @returns Promise that resolves when search results are loaded
1044
+ */
1045
+ async waitForSearchResults(page, timeout) {
1046
+ await page.waitForSelector("#pagebar_container", {
1047
+ timeout: timeout ?? 1e4
1048
+ });
1049
+ }
1050
+ };
1051
+
1052
+ // src/libs/browser-search/engines/google.ts
1053
+ var GoogleSearchEngine = class {
1054
+ /**
1055
+ * Generates a Google search URL based on the provided query and options.
1056
+ *
1057
+ * @param query - The search query string
1058
+ * @param options - Search configuration options
1059
+ * @param options.count - Number of search results to request (default: 10)
1060
+ * @param options.excludeDomains - Array of domain names to exclude from search results
1061
+ * @returns Formatted Google search URL as a string
1062
+ */
1063
+ getSearchUrl(query, options) {
1064
+ const searchParams = new URLSearchParams({
1065
+ q: `${options.excludeDomains && options.excludeDomains.length > 0 ? `${options.excludeDomains.map((domain) => `-site:${domain}`).join(" ")} ` : ""}${query}`,
1066
+ num: `${options.count || 10}`
1067
+ });
1068
+ searchParams.set("udm", "14");
1069
+ return `https://www.google.com/search?${searchParams.toString()}`;
1070
+ }
1071
+ /**
1072
+ * Extracts search results from a Google search page.
1073
+ *
1074
+ * @param window - The browser window object containing the loaded Google search page
1075
+ * @returns Array of search results extracted from the page
1076
+ */
1077
+ extractSearchResults(window2) {
1078
+ const links = [];
1079
+ const document2 = window2.document;
1080
+ const isValidUrl = (url2) => {
1081
+ try {
1082
+ new URL(url2);
1083
+ return true;
1084
+ } catch (error) {
1085
+ return false;
1086
+ }
1087
+ };
1088
+ const extractSnippet = (element) => {
1089
+ const clone = element.cloneNode(true);
1090
+ const titleElements = clone.querySelectorAll("h3");
1091
+ titleElements.forEach((el) => el.remove());
1092
+ const citeElements = clone.querySelectorAll("cite");
1093
+ citeElements.forEach((el) => el.remove());
1094
+ const scriptElements = clone.querySelectorAll("script, style");
1095
+ scriptElements.forEach((el) => el.remove());
1096
+ const text = Array.from(clone.querySelectorAll("*")).filter((node) => node.textContent?.trim()).map((node) => node.textContent?.trim()).filter(Boolean).reduce((acc, curr) => {
1097
+ if (!acc.some(
1098
+ (text2) => text2.includes(curr) || curr.includes(text2)
1099
+ )) {
1100
+ acc.push(curr);
1101
+ }
1102
+ return acc;
1103
+ }, []).join(" ").trim().replace(/\s+/g, " ");
1104
+ return text;
1105
+ };
1106
+ try {
1107
+ const elements = document2.querySelectorAll(".tF2Cxc");
1108
+ elements.forEach((element) => {
1109
+ const titleEl = element.querySelector("h3");
1110
+ const urlEl = element.querySelector("a");
1111
+ const url2 = urlEl?.getAttribute("href");
1112
+ const snippet = extractSnippet(element.parentElement || element);
1113
+ if (!url2 || !isValidUrl(url2)) return;
1114
+ const item = {
1115
+ title: titleEl?.textContent || "",
1116
+ url: url2,
1117
+ snippet,
1118
+ content: ""
1119
+ };
1120
+ if (!item.title || !item.url) return;
1121
+ links.push(item);
1122
+ });
1123
+ } catch (error) {
1124
+ console.error(error);
1125
+ }
1126
+ return links;
1127
+ }
1128
+ /**
1129
+ * Waits for Google search results to load completely.
1130
+ *
1131
+ * @param page - The Puppeteer page object
1132
+ * @returns Promise that resolves when search results are loaded
1133
+ */
1134
+ async waitForSearchResults(page, timeout) {
1135
+ await page.waitForSelector("#search", {
1136
+ timeout: timeout ?? 1e4
1137
+ });
1138
+ }
1139
+ };
1140
+
1141
+ // src/libs/browser-search/engines/get.ts
1142
+ function getSearchEngine(engine) {
1143
+ switch (engine) {
1144
+ case "bing":
1145
+ return new BingSearchEngine();
1146
+ case "baidu":
1147
+ return new BaiduSearchEngine();
1148
+ case "sogou":
1149
+ return new SogouSearchEngine();
1150
+ case "google":
1151
+ return new GoogleSearchEngine();
1152
+ default:
1153
+ return new BingSearchEngine();
1154
+ }
1155
+ }
1156
+
1157
+ // src/libs/browser-search/search.ts
1158
+ var BrowserSearch = class {
1159
+ constructor(config = {}) {
1160
+ this.config = config;
1161
+ this.logger = config?.logger ?? import_logger4.defaultLogger;
1162
+ this.browser = config.browser ?? new LocalBrowser({ logger: this.logger });
1163
+ this.defaultEngine = config.defaultEngine ?? "bing";
1164
+ }
1165
+ logger;
1166
+ browser;
1167
+ isBrowserOpen = false;
1168
+ defaultEngine;
1169
+ /**
1170
+ * Search web and extract content from result pages
1171
+ */
1172
+ async perform(options) {
1173
+ this.logger.info("Starting search with options:", options);
1174
+ const queries = Array.isArray(options.query) ? options.query : [options.query];
1175
+ const excludeDomains = options.excludeDomains || [];
1176
+ const count = options.count && Math.max(3, Math.floor(options.count / queries.length));
1177
+ const engine = options.engine || this.defaultEngine;
1178
+ try {
1179
+ if (!this.isBrowserOpen) {
1180
+ this.logger.info("Launching browser");
1181
+ await this.browser.launch(this.config.browserOptions);
1182
+ this.isBrowserOpen = true;
1183
+ } else {
1184
+ this.logger.info("Using existing browser instance");
1185
+ }
1186
+ const queue = new PromiseQueue(options.concurrency || 15);
1187
+ const visitedUrls = /* @__PURE__ */ new Set();
1188
+ const results = await Promise.all(
1189
+ queries.map(
1190
+ (query) => this.search(this.browser, {
1191
+ query,
1192
+ count,
1193
+ queue,
1194
+ visitedUrls,
1195
+ excludeDomains,
1196
+ truncate: options.truncate,
1197
+ needVisitedUrls: options.needVisitedUrls,
1198
+ engine
1199
+ })
1200
+ )
1201
+ );
1202
+ this.logger.success("Search completed successfully");
1203
+ return results.flat();
1204
+ } catch (error) {
1205
+ this.logger.error("Search failed:", error);
1206
+ return [];
1207
+ } finally {
1208
+ if (!options.keepBrowserOpen && this.isBrowserOpen) {
1209
+ await this.closeBrowser();
1210
+ }
1211
+ }
1212
+ }
1213
+ /**
1214
+ * Explicitly close the browser instance
1215
+ */
1216
+ async closeBrowser() {
1217
+ if (this.isBrowserOpen) {
1218
+ this.logger.info("Closing browser");
1219
+ await this.browser.close();
1220
+ this.isBrowserOpen = false;
1221
+ }
1222
+ }
1223
+ async search(browser, options) {
1224
+ const searchEngine = getSearchEngine(options.engine);
1225
+ const url2 = searchEngine.getSearchUrl(options.query, {
1226
+ count: options.count,
1227
+ excludeDomains: options.excludeDomains
1228
+ });
1229
+ this.logger.info(`Searching with ${options.engine} engine: ${url2}`);
1230
+ let links = await browser.evaluateOnNewPage({
1231
+ url: url2,
1232
+ waitForOptions: {
1233
+ waitUntil: "networkidle0"
1234
+ },
1235
+ pageFunction: searchEngine.extractSearchResults,
1236
+ pageFunctionParams: [],
1237
+ beforePageLoad: async (page) => {
1238
+ await interceptRequest(page);
1239
+ },
1240
+ afterPageLoad: async (page) => {
1241
+ if (searchEngine.waitForSearchResults)
1242
+ await searchEngine.waitForSearchResults(page, 1e4);
1243
+ }
1244
+ });
1245
+ this.logger.info(`Fetched ${links?.length ?? 0} links`);
1246
+ links = links?.filter((link) => {
1247
+ if (options.visitedUrls.has(link.url)) return false;
1248
+ options.visitedUrls.add(link.url);
1249
+ return !shouldSkipDomain(link.url);
1250
+ }) || [];
1251
+ if (!links.length) {
1252
+ this.logger.info("No valid links found");
1253
+ return [];
1254
+ }
1255
+ const results = await Promise.allSettled(
1256
+ options.needVisitedUrls ? links.map(
1257
+ (item) => options.queue.add(() => this.visitLink(this.browser, item))
1258
+ ) : links
1259
+ );
1260
+ return results.map((result) => {
1261
+ if (result.status === "rejected" || !result.value) return null;
1262
+ return {
1263
+ ...result.value,
1264
+ content: options.truncate ? result.value.content.slice(0, options.truncate) : result.value.content
1265
+ };
1266
+ }).filter((v) => v !== null);
1267
+ }
1268
+ async visitLink(browser, item) {
1269
+ try {
1270
+ this.logger.info("Visiting link:", item.url);
1271
+ const result = await browser.evaluateOnNewPage({
1272
+ url: item.url,
1273
+ pageFunction: extractPageInformation,
1274
+ pageFunctionParams: [READABILITY_SCRIPT],
1275
+ beforePageLoad: async (page) => {
1276
+ await interceptRequest(page);
1277
+ }
1278
+ });
1279
+ if (result) {
1280
+ const content = toMarkdown(result.content);
1281
+ return { ...result, url: item.url, content, snippet: item.snippet };
1282
+ }
1283
+ } catch (e) {
1284
+ this.logger.error("Failed to visit link:", e);
1285
+ }
1286
+ }
1287
+ };
1288
+
1289
+ // src/search/local.ts
1290
+ var import_logger5 = require("@agent-infra/logger");
1291
+ var logger2 = new import_logger5.ConsoleLogger("[LocalSearch]");
1292
+ async function localSearch(options) {
1293
+ const { query, limit = 10 } = options;
1294
+ let { engines = "all" } = options;
1295
+ const browserSearch = new BrowserSearch({
1296
+ logger: logger2,
1297
+ browserOptions: {
1298
+ headless: true
1299
+ }
1300
+ });
1301
+ if (engines === "all") {
1302
+ engines = "bing,google,baidu,sogou";
1303
+ }
1304
+ try {
1305
+ const engineList = engines.split(",");
1306
+ if (engineList.length === 0) {
1307
+ throw new Error("engines is required");
1308
+ }
1309
+ const results = [];
1310
+ for (const engine of engineList) {
1311
+ const res = await browserSearch.perform({
1312
+ query,
1313
+ count: limit,
1314
+ engine,
1315
+ needVisitedUrls: false
1316
+ });
1317
+ if (res.length > 0) {
1318
+ results.push(...res);
1319
+ break;
1320
+ }
1321
+ }
1322
+ logger2.info(`Found ${results.length} results for ${query}`, results);
1323
+ return {
1324
+ results,
1325
+ success: true
1326
+ };
1327
+ } finally {
1328
+ await browserSearch.closeBrowser();
1329
+ }
1330
+ }
152
1331
 
153
1332
  // src/tools.ts
154
1333
  var SEARCH_TOOL = {
@@ -171,16 +1350,69 @@ var SEARCH_TOOL = {
171
1350
  },
172
1351
  categories: {
173
1352
  type: "string",
1353
+ enum: [
1354
+ "general",
1355
+ "news",
1356
+ "images",
1357
+ "videos",
1358
+ "it",
1359
+ "science",
1360
+ "map",
1361
+ "music",
1362
+ "files",
1363
+ "social_media"
1364
+ ],
174
1365
  description: "Categories to search for (default: general)"
175
1366
  },
176
1367
  timeRange: {
177
1368
  type: "string",
178
- description: "Time range for search results (default: all)"
1369
+ description: "Time range for search results (default: all)",
1370
+ enum: [
1371
+ "all",
1372
+ "day",
1373
+ "week",
1374
+ "month",
1375
+ "year"
1376
+ ]
179
1377
  }
180
1378
  },
181
1379
  required: ["query"]
182
1380
  }
183
1381
  };
1382
+ var MAP_TOOL = {
1383
+ name: "one_map",
1384
+ description: "Discover URLs from a starting point. Can use both sitemap.xml and HTML link discovery.",
1385
+ inputSchema: {
1386
+ type: "object",
1387
+ properties: {
1388
+ url: {
1389
+ type: "string",
1390
+ description: "Starting URL for URL discovery"
1391
+ },
1392
+ search: {
1393
+ type: "string",
1394
+ description: "Optional search term to filter URLs"
1395
+ },
1396
+ ignoreSitemap: {
1397
+ type: "boolean",
1398
+ description: "Skip sitemap.xml discovery and only use HTML links"
1399
+ },
1400
+ sitemapOnly: {
1401
+ type: "boolean",
1402
+ description: "Only use sitemap.xml for discovery, ignore HTML links"
1403
+ },
1404
+ includeSubdomains: {
1405
+ type: "boolean",
1406
+ description: "Include URLs from subdomains in results"
1407
+ },
1408
+ limit: {
1409
+ type: "number",
1410
+ description: "Maximum number of URLs to return"
1411
+ }
1412
+ },
1413
+ required: ["url"]
1414
+ }
1415
+ };
184
1416
  var SCRAPE_TOOL = {
185
1417
  name: "one_scrape",
186
1418
  description: "Scrape a single webpage with advanced options for content extraction. Supports various formats including markdown, HTML, and screenshots. Can execute custom actions like clicking or scrolling before scraping.",
@@ -374,6 +1606,7 @@ var EXTRACT_TOOL = {
374
1606
  // src/index.ts
375
1607
  var import_firecrawl_js = __toESM(require("@mendable/firecrawl-js"), 1);
376
1608
  var import_dotenvx = __toESM(require("@dotenvx/dotenvx"), 1);
1609
+ var import_duck_duck_scrape = require("duck-duck-scrape");
377
1610
  import_dotenvx.default.config();
378
1611
  var SEARCH_API_URL = process.env.SEARCH_API_URL;
379
1612
  var SEARCH_API_KEY = process.env.SEARCH_API_KEY;
@@ -404,7 +1637,7 @@ var server = new import_server.Server(
404
1637
  }
405
1638
  }
406
1639
  );
407
- var searchConfig = {
1640
+ var searchDefaultConfig = {
408
1641
  limit: Number(LIMIT),
409
1642
  categories: CATEGORIES,
410
1643
  format: FORMAT,
@@ -418,7 +1651,8 @@ server.setRequestHandler(import_types.ListToolsRequestSchema, async () => ({
418
1651
  tools: [
419
1652
  SEARCH_TOOL,
420
1653
  EXTRACT_TOOL,
421
- SCRAPE_TOOL
1654
+ SCRAPE_TOOL,
1655
+ MAP_TOOL
422
1656
  ]
423
1657
  }));
424
1658
  server.setRequestHandler(import_types.CallToolRequestSchema, async (request) => {
@@ -439,10 +1673,9 @@ server.setRequestHandler(import_types.CallToolRequestSchema, async (request) =>
439
1673
  }
440
1674
  try {
441
1675
  const { results, success } = await processSearch({
442
- ...searchConfig,
443
1676
  ...args,
444
1677
  apiKey: SEARCH_API_KEY ?? "",
445
- apiUrl: SEARCH_API_URL ?? ""
1678
+ apiUrl: SEARCH_API_URL
446
1679
  });
447
1680
  if (!success) {
448
1681
  throw new Error("Failed to search");
@@ -516,6 +1749,34 @@ ${result.markdown ? `Content: ${result.markdown}` : ""}`);
516
1749
  };
517
1750
  }
518
1751
  }
1752
+ case "one_map": {
1753
+ if (!checkMapArgs(args)) {
1754
+ throw new Error(`Invalid arguments for tool: [${name}]`);
1755
+ }
1756
+ try {
1757
+ const { content, success, result } = await processMapUrl(args.url, args);
1758
+ return {
1759
+ content,
1760
+ result,
1761
+ success
1762
+ };
1763
+ } catch (error) {
1764
+ server.sendLoggingMessage({
1765
+ level: "error",
1766
+ data: `[${(/* @__PURE__ */ new Date()).toISOString()}] Error mapping: ${error}`
1767
+ });
1768
+ const msg = error instanceof Error ? error.message : String(error);
1769
+ return {
1770
+ success: false,
1771
+ content: [
1772
+ {
1773
+ type: "text",
1774
+ text: msg
1775
+ }
1776
+ ]
1777
+ };
1778
+ }
1779
+ }
519
1780
  default: {
520
1781
  throw new Error(`Unknown tool: ${name}`);
521
1782
  }
@@ -550,18 +1811,51 @@ ${result.markdown ? `Content: ${result.markdown}` : ""}`);
550
1811
  });
551
1812
  async function processSearch(args) {
552
1813
  switch (SEARCH_PROVIDER) {
553
- case "searxng":
554
- return await searxngSearch({
555
- ...searchConfig,
1814
+ case "searxng": {
1815
+ const params = {
1816
+ ...searchDefaultConfig,
556
1817
  ...args,
557
1818
  apiKey: SEARCH_API_KEY
558
- });
559
- case "tavily":
1819
+ };
1820
+ const { categories, language } = searchDefaultConfig;
1821
+ if (categories) {
1822
+ params.categories = categories;
1823
+ }
1824
+ if (language) {
1825
+ params.language = language;
1826
+ }
1827
+ return await searxngSearch(params);
1828
+ }
1829
+ case "tavily": {
560
1830
  return await tavilySearch({
561
- ...searchConfig,
1831
+ ...searchDefaultConfig,
562
1832
  ...args,
563
1833
  apiKey: SEARCH_API_KEY
564
1834
  });
1835
+ }
1836
+ case "bing": {
1837
+ return await bingSearch({
1838
+ ...searchDefaultConfig,
1839
+ ...args,
1840
+ apiKey: SEARCH_API_KEY
1841
+ });
1842
+ }
1843
+ case "duckduckgo": {
1844
+ const safeSearch = args.safeSearch ?? 0;
1845
+ const safeSearchOptions = [import_duck_duck_scrape.SafeSearchType.STRICT, import_duck_duck_scrape.SafeSearchType.MODERATE, import_duck_duck_scrape.SafeSearchType.OFF];
1846
+ return await duckDuckGoSearch({
1847
+ ...searchDefaultConfig,
1848
+ ...args,
1849
+ apiKey: SEARCH_API_KEY,
1850
+ safeSearch: safeSearchOptions[safeSearch]
1851
+ });
1852
+ }
1853
+ case "local": {
1854
+ return await localSearch({
1855
+ ...searchDefaultConfig,
1856
+ ...args
1857
+ });
1858
+ }
565
1859
  default:
566
1860
  throw new Error(`Unsupported search provider: ${SEARCH_PROVIDER}`);
567
1861
  }
@@ -603,12 +1897,36 @@ async function processScrape(url2, args) {
603
1897
  success: true
604
1898
  };
605
1899
  }
1900
+ async function processMapUrl(url2, args) {
1901
+ const res = await firecrawl.mapUrl(url2, {
1902
+ ...args
1903
+ });
1904
+ if ("error" in res) {
1905
+ throw new Error(`Failed to map: ${res.error}`);
1906
+ }
1907
+ if (!res.links) {
1908
+ throw new Error(`No links found from: ${url2}`);
1909
+ }
1910
+ return {
1911
+ content: [
1912
+ {
1913
+ type: "text",
1914
+ text: res.links.join("\n").trim()
1915
+ }
1916
+ ],
1917
+ result: res.links,
1918
+ success: true
1919
+ };
1920
+ }
606
1921
  function checkSearchArgs(args) {
607
1922
  return typeof args === "object" && args !== null && "query" in args && typeof args.query === "string";
608
1923
  }
609
1924
  function checkScrapeArgs(args) {
610
1925
  return typeof args === "object" && args !== null && "url" in args && typeof args.url === "string";
611
1926
  }
1927
+ function checkMapArgs(args) {
1928
+ return typeof args === "object" && args !== null && "url" in args && typeof args.url === "string";
1929
+ }
612
1930
  async function runServer() {
613
1931
  try {
614
1932
  process.stdout.write("Starting OneSearch MCP server...\n");