one-search-mcp 1.0.9 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -0
- package/dist/index.cjs +14 -1950
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +14 -1924
- package/dist/index.js.map +1 -1
- package/package.json +3 -2
package/dist/index.cjs
CHANGED
|
@@ -1,1952 +1,16 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
"use strict";
|
|
3
|
-
var __create = Object.create;
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
};
|
|
17
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
18
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
19
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
20
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
21
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
22
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
23
|
-
mod
|
|
24
|
-
));
|
|
25
|
-
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
26
|
-
|
|
27
|
-
// src/index.ts
|
|
28
|
-
var index_exports = {};
|
|
29
|
-
module.exports = __toCommonJS(index_exports);
|
|
30
|
-
var import_server = require("@modelcontextprotocol/sdk/server/index.js");
|
|
31
|
-
var import_types = require("@modelcontextprotocol/sdk/types.js");
|
|
32
|
-
var import_stdio = require("@modelcontextprotocol/sdk/server/stdio.js");
|
|
33
|
-
|
|
34
|
-
// src/search/bing.ts
|
|
35
|
-
async function bingSearch(options) {
|
|
36
|
-
const { query, limit = 10, safeSearch = 0, page = 1, apiUrl = "https://api.bing.microsoft.com/v7.0/search", apiKey, language } = options;
|
|
37
|
-
const bingSafeSearchOptions = ["Off", "Moderate", "Strict"];
|
|
38
|
-
if (!apiKey) {
|
|
39
|
-
throw new Error("Bing API key is required");
|
|
40
|
-
}
|
|
41
|
-
const searchOptions = {
|
|
42
|
-
q: query,
|
|
43
|
-
count: limit,
|
|
44
|
-
offset: (page - 1) * limit,
|
|
45
|
-
mkt: language,
|
|
46
|
-
safeSearch: bingSafeSearchOptions[safeSearch]
|
|
47
|
-
};
|
|
48
|
-
try {
|
|
49
|
-
const queryParams = new URLSearchParams();
|
|
50
|
-
Object.entries(searchOptions).forEach(([key, value]) => {
|
|
51
|
-
if (value !== void 0) {
|
|
52
|
-
queryParams.set(key, value.toString());
|
|
53
|
-
}
|
|
54
|
-
});
|
|
55
|
-
const res = await fetch(`${apiUrl}?${queryParams}`, {
|
|
56
|
-
method: "GET",
|
|
57
|
-
headers: {
|
|
58
|
-
"Content-Type": "application/json",
|
|
59
|
-
"Ocp-Apim-Subscription-Key": apiKey
|
|
60
|
-
}
|
|
61
|
-
});
|
|
62
|
-
if (!res.ok) {
|
|
63
|
-
throw new Error(`Bing search error: ${res.status} ${res.statusText}`);
|
|
64
|
-
}
|
|
65
|
-
const data = await res.json();
|
|
66
|
-
const serp = data.webPages?.value;
|
|
67
|
-
const results = serp?.map((item) => ({
|
|
68
|
-
title: item.name,
|
|
69
|
-
snippet: item.snippet,
|
|
70
|
-
url: item.url,
|
|
71
|
-
source: item.siteName,
|
|
72
|
-
thumbnailUrl: item.thumbnailUrl,
|
|
73
|
-
language: item.language,
|
|
74
|
-
image: null,
|
|
75
|
-
video: null,
|
|
76
|
-
engine: "bing"
|
|
77
|
-
})) ?? [];
|
|
78
|
-
return {
|
|
79
|
-
results,
|
|
80
|
-
success: true
|
|
81
|
-
};
|
|
82
|
-
} catch (err) {
|
|
83
|
-
const msg = err instanceof Error ? err.message : "Bing search error.";
|
|
84
|
-
process.stdout.write(msg);
|
|
85
|
-
throw err;
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
// src/search/duckduckgo.ts
|
|
90
|
-
var DDG = __toESM(require("duck-duck-scrape"), 1);
|
|
91
|
-
var import_async_retry = __toESM(require("async-retry"), 1);
|
|
92
|
-
async function duckDuckGoSearch(options) {
|
|
93
|
-
try {
|
|
94
|
-
const { query, timeout = 1e4, safeSearch = DDG.SafeSearchType.OFF, retry = { retries: 3 }, ...searchOptions } = options;
|
|
95
|
-
const res = await (0, import_async_retry.default)(
|
|
96
|
-
() => {
|
|
97
|
-
return DDG.search(query, {
|
|
98
|
-
...searchOptions,
|
|
99
|
-
safeSearch
|
|
100
|
-
}, {
|
|
101
|
-
// needle options
|
|
102
|
-
response_timeout: timeout
|
|
103
|
-
});
|
|
104
|
-
},
|
|
105
|
-
retry
|
|
106
|
-
);
|
|
107
|
-
const results = res ? {
|
|
108
|
-
noResults: res.noResults,
|
|
109
|
-
vqd: res.vqd,
|
|
110
|
-
results: res.results
|
|
111
|
-
} : {
|
|
112
|
-
noResults: true,
|
|
113
|
-
vqd: "",
|
|
114
|
-
results: []
|
|
115
|
-
};
|
|
116
|
-
return {
|
|
117
|
-
results: results.results.map((result) => ({
|
|
118
|
-
title: result.title,
|
|
119
|
-
snippet: result.description,
|
|
120
|
-
url: result.url,
|
|
121
|
-
source: result.hostname,
|
|
122
|
-
image: null,
|
|
123
|
-
video: null,
|
|
124
|
-
engine: "duckduckgo"
|
|
125
|
-
})),
|
|
126
|
-
success: true
|
|
127
|
-
};
|
|
128
|
-
} catch (error) {
|
|
129
|
-
const msg = error instanceof Error ? error.message : "DuckDuckGo search error.";
|
|
130
|
-
process.stdout.write(msg);
|
|
131
|
-
throw error;
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
// src/search/searxng.ts
|
|
136
|
-
var import_node_url = __toESM(require("url"), 1);
|
|
137
|
-
async function searxngSearch(params) {
|
|
138
|
-
try {
|
|
139
|
-
const {
|
|
140
|
-
query,
|
|
141
|
-
page = 1,
|
|
142
|
-
limit = 10,
|
|
143
|
-
categories = "general",
|
|
144
|
-
engines = "all",
|
|
145
|
-
safeSearch = 0,
|
|
146
|
-
format = "json",
|
|
147
|
-
language = "auto",
|
|
148
|
-
timeRange = "",
|
|
149
|
-
timeout = 1e4,
|
|
150
|
-
apiKey,
|
|
151
|
-
apiUrl
|
|
152
|
-
} = params;
|
|
153
|
-
if (!apiUrl) {
|
|
154
|
-
throw new Error("SearxNG API URL is required");
|
|
155
|
-
}
|
|
156
|
-
const controller = new AbortController();
|
|
157
|
-
const timeoutId = setTimeout(() => controller.abort(), Number(timeout));
|
|
158
|
-
const config = {
|
|
159
|
-
q: query,
|
|
160
|
-
pageno: page,
|
|
161
|
-
categories,
|
|
162
|
-
format,
|
|
163
|
-
safesearch: safeSearch,
|
|
164
|
-
language,
|
|
165
|
-
engines,
|
|
166
|
-
time_range: timeRange
|
|
167
|
-
};
|
|
168
|
-
const endpoint = `${apiUrl}/search`;
|
|
169
|
-
const queryParams = import_node_url.default.format({ query: config });
|
|
170
|
-
const headers = {
|
|
171
|
-
"Content-Type": "application/json"
|
|
172
|
-
};
|
|
173
|
-
if (apiKey) {
|
|
174
|
-
headers["Authorization"] = `Bearer ${apiKey}`;
|
|
175
|
-
}
|
|
176
|
-
const res = await fetch(`${endpoint}${queryParams}`, {
|
|
177
|
-
method: "POST",
|
|
178
|
-
headers,
|
|
179
|
-
signal: controller.signal
|
|
180
|
-
});
|
|
181
|
-
clearTimeout(timeoutId);
|
|
182
|
-
const response = await res.json();
|
|
183
|
-
if (response.results) {
|
|
184
|
-
const list = response.results.slice(0, limit);
|
|
185
|
-
const results = list.map((item) => {
|
|
186
|
-
const image = item.img_src ? {
|
|
187
|
-
thumbnail: item.thumbnail_src,
|
|
188
|
-
src: item.img_src
|
|
189
|
-
} : null;
|
|
190
|
-
const video = item.iframe_src ? {
|
|
191
|
-
thumbnail: item.thumbnail_src,
|
|
192
|
-
src: item.iframe_src
|
|
193
|
-
} : null;
|
|
194
|
-
return {
|
|
195
|
-
title: item.title,
|
|
196
|
-
snippet: item.content,
|
|
197
|
-
url: item.url,
|
|
198
|
-
source: item.source,
|
|
199
|
-
image,
|
|
200
|
-
video,
|
|
201
|
-
engine: item.engine
|
|
202
|
-
};
|
|
203
|
-
});
|
|
204
|
-
return {
|
|
205
|
-
results,
|
|
206
|
-
success: true
|
|
207
|
-
};
|
|
208
|
-
}
|
|
209
|
-
return {
|
|
210
|
-
results: [],
|
|
211
|
-
success: false
|
|
212
|
-
};
|
|
213
|
-
} catch (err) {
|
|
214
|
-
const msg = err instanceof Error ? err.message : "Searxng search error.";
|
|
215
|
-
process.stdout.write(msg);
|
|
216
|
-
throw err;
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
// src/search/tavily.ts
|
|
221
|
-
var import_core = require("@tavily/core");
|
|
222
|
-
async function tavilySearch(options) {
|
|
223
|
-
const {
|
|
224
|
-
query,
|
|
225
|
-
limit = 10,
|
|
226
|
-
categories = "general",
|
|
227
|
-
timeRange,
|
|
228
|
-
apiKey
|
|
229
|
-
} = options;
|
|
230
|
-
if (!apiKey) {
|
|
231
|
-
throw new Error("Tavily API key is required");
|
|
232
|
-
}
|
|
233
|
-
try {
|
|
234
|
-
const tvly = (0, import_core.tavily)({
|
|
235
|
-
apiKey
|
|
236
|
-
});
|
|
237
|
-
const params = {
|
|
238
|
-
topic: categories,
|
|
239
|
-
timeRange,
|
|
240
|
-
maxResults: limit
|
|
241
|
-
};
|
|
242
|
-
const res = await tvly.search(query, params);
|
|
243
|
-
const results = res.results.map((item) => ({
|
|
244
|
-
title: item.title,
|
|
245
|
-
url: item.url,
|
|
246
|
-
snippet: item.content,
|
|
247
|
-
engine: "tavily"
|
|
248
|
-
}));
|
|
249
|
-
return {
|
|
250
|
-
results,
|
|
251
|
-
success: true
|
|
252
|
-
};
|
|
253
|
-
} catch (error) {
|
|
254
|
-
const msg = error instanceof Error ? error.message : "Tavily search error.";
|
|
255
|
-
process.stdout.write(msg);
|
|
256
|
-
throw error;
|
|
257
|
-
}
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
// src/libs/browser/types.ts
|
|
261
|
-
var import_puppeteer_core = require("puppeteer-core");
|
|
262
|
-
|
|
263
|
-
// src/libs/browser/finder.ts
|
|
264
|
-
var fs = __toESM(require("fs"), 1);
|
|
265
|
-
var path = __toESM(require("path"), 1);
|
|
266
|
-
var os = __toESM(require("os"), 1);
|
|
267
|
-
var import_logger = require("@agent-infra/logger");
|
|
268
|
-
var BrowserFinder = class {
|
|
269
|
-
/**
|
|
270
|
-
* Logger instance for diagnostic output
|
|
271
|
-
*/
|
|
272
|
-
logger;
|
|
273
|
-
/**
|
|
274
|
-
* Creates a new BrowserFinder instance
|
|
275
|
-
* @param {Logger} [logger] - Optional custom logger
|
|
276
|
-
*/
|
|
277
|
-
constructor(logger3) {
|
|
278
|
-
this.logger = logger3 ?? import_logger.defaultLogger;
|
|
279
|
-
}
|
|
280
|
-
/**
|
|
281
|
-
* Getter that returns the list of supported browsers with their platform-specific paths
|
|
282
|
-
* @returns {Browser[]} Array of browser configurations
|
|
283
|
-
* @private
|
|
284
|
-
*/
|
|
285
|
-
get browsers() {
|
|
286
|
-
const HOME_DIR = os.homedir();
|
|
287
|
-
const LOCAL_APP_DATA = process.env.LOCALAPPDATA;
|
|
288
|
-
return [
|
|
289
|
-
{
|
|
290
|
-
name: "Chromium",
|
|
291
|
-
executable: {
|
|
292
|
-
win32: "C:\\Program Files\\Chromium\\Application\\chrome.exe",
|
|
293
|
-
darwin: "/Applications/Chromium.app/Contents/MacOS/Chromium",
|
|
294
|
-
linux: "/usr/bin/chromium"
|
|
295
|
-
},
|
|
296
|
-
userDataDir: {
|
|
297
|
-
win32: `${LOCAL_APP_DATA}\\Chromium\\User Data`,
|
|
298
|
-
darwin: `${HOME_DIR}/Library/Application Support/Chromium`,
|
|
299
|
-
linux: `${HOME_DIR}/.config/chromium`
|
|
300
|
-
}
|
|
301
|
-
},
|
|
302
|
-
{
|
|
303
|
-
name: "Google Chrome",
|
|
304
|
-
executable: {
|
|
305
|
-
win32: "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
|
|
306
|
-
darwin: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
307
|
-
linux: "/usr/bin/google-chrome"
|
|
308
|
-
},
|
|
309
|
-
userDataDir: {
|
|
310
|
-
win32: `${LOCAL_APP_DATA}\\Google\\Chrome\\User Data`,
|
|
311
|
-
darwin: `${HOME_DIR}/Library/Application Support/Google/Chrome`,
|
|
312
|
-
linux: `${HOME_DIR}/.config/google-chrome`
|
|
313
|
-
}
|
|
314
|
-
},
|
|
315
|
-
{
|
|
316
|
-
name: "Google Chrome Canary",
|
|
317
|
-
executable: {
|
|
318
|
-
win32: "C:\\Program Files\\Google\\Chrome Canary\\Application\\chrome.exe",
|
|
319
|
-
darwin: "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
|
|
320
|
-
linux: "/usr/bin/google-chrome-canary"
|
|
321
|
-
},
|
|
322
|
-
userDataDir: {
|
|
323
|
-
win32: `${LOCAL_APP_DATA}\\Google\\Chrome Canary\\User Data`,
|
|
324
|
-
darwin: `${HOME_DIR}/Library/Application Support/Google/Chrome Canary`,
|
|
325
|
-
linux: `${HOME_DIR}/.config/google-chrome-canary`
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
];
|
|
329
|
-
}
|
|
330
|
-
/**
|
|
331
|
-
* Find a specific browser or the first available browser
|
|
332
|
-
* @param {string} [name] - Optional browser name to find
|
|
333
|
-
* @returns {{ executable: string; userDataDir: string }} Browser executable and user data paths
|
|
334
|
-
* @throws {Error} If no supported browser is found or the platform is unsupported
|
|
335
|
-
*/
|
|
336
|
-
findBrowser(name) {
|
|
337
|
-
const platform = process.platform;
|
|
338
|
-
this.logger.info("Finding browser on platform:", platform);
|
|
339
|
-
if (platform !== "darwin" && platform !== "win32" && platform !== "linux") {
|
|
340
|
-
const error = new Error(`Unsupported platform: ${platform}`);
|
|
341
|
-
this.logger.error(error.message);
|
|
342
|
-
throw error;
|
|
343
|
-
}
|
|
344
|
-
const browser = name ? this.browsers.find(
|
|
345
|
-
(b) => b.name === name && fs.existsSync(b.executable[platform])
|
|
346
|
-
) : this.browsers.find((b) => fs.existsSync(b.executable[platform]));
|
|
347
|
-
this.logger.log("browser", browser);
|
|
348
|
-
if (!browser) {
|
|
349
|
-
const error = name ? new Error(`Cannot find browser: ${name}`) : new Error(
|
|
350
|
-
"Cannot find a supported browser on your system. Please install Chrome, Edge, or Brave."
|
|
351
|
-
);
|
|
352
|
-
this.logger.error(error.message);
|
|
353
|
-
throw error;
|
|
354
|
-
}
|
|
355
|
-
const result = {
|
|
356
|
-
executable: browser.executable[platform],
|
|
357
|
-
userDataDir: browser.userDataDir[platform]
|
|
358
|
-
};
|
|
359
|
-
this.logger.success(`Found browser: ${browser.name}`);
|
|
360
|
-
this.logger.info("Browser details:", result);
|
|
361
|
-
return result;
|
|
362
|
-
}
|
|
363
|
-
/**
|
|
364
|
-
* Get browser profiles for a specific browser
|
|
365
|
-
* Reads the Local State file to extract profile information
|
|
366
|
-
* @param {string} [browserName] - Optional browser name to get profiles for
|
|
367
|
-
* @returns {Array<{ displayName: string; path: string }>} Array of profile objects with display names and paths
|
|
368
|
-
*/
|
|
369
|
-
getBrowserProfiles(browserName) {
|
|
370
|
-
const browser = this.findBrowser(browserName);
|
|
371
|
-
try {
|
|
372
|
-
const localState = JSON.parse(
|
|
373
|
-
fs.readFileSync(path.join(browser.userDataDir, "Local State"), "utf8")
|
|
374
|
-
);
|
|
375
|
-
const profileInfo = localState.profile.info_cache;
|
|
376
|
-
return Object.entries(profileInfo).map(
|
|
377
|
-
([profileName, info]) => ({
|
|
378
|
-
displayName: info.name,
|
|
379
|
-
path: path.join(browser.userDataDir, profileName)
|
|
380
|
-
})
|
|
381
|
-
);
|
|
382
|
-
} catch (error) {
|
|
383
|
-
return [];
|
|
384
|
-
}
|
|
385
|
-
}
|
|
386
|
-
/**
|
|
387
|
-
* Legacy method for backwards compatibility
|
|
388
|
-
* Finds Chrome browser executable path
|
|
389
|
-
* @deprecated Use findBrowser instead
|
|
390
|
-
* @returns {string | null} Chrome executable path or null if not found
|
|
391
|
-
*/
|
|
392
|
-
findChrome() {
|
|
393
|
-
try {
|
|
394
|
-
const { executable } = this.findBrowser("Google Chrome");
|
|
395
|
-
return executable;
|
|
396
|
-
} catch {
|
|
397
|
-
return null;
|
|
398
|
-
}
|
|
399
|
-
}
|
|
400
|
-
};
|
|
401
|
-
|
|
402
|
-
// src/libs/browser/base.ts
|
|
403
|
-
var import_logger2 = require("@agent-infra/logger");
|
|
404
|
-
var BaseBrowser = class {
|
|
405
|
-
/**
|
|
406
|
-
* The underlying Puppeteer browser instance
|
|
407
|
-
* @protected
|
|
408
|
-
*/
|
|
409
|
-
browser = null;
|
|
410
|
-
/**
|
|
411
|
-
* Logger instance for browser-related logging
|
|
412
|
-
* @protected
|
|
413
|
-
*/
|
|
414
|
-
logger;
|
|
415
|
-
/**
|
|
416
|
-
* Reference to the currently active browser page
|
|
417
|
-
* @protected
|
|
418
|
-
*/
|
|
419
|
-
activePage = null;
|
|
420
|
-
/**
|
|
421
|
-
* Creates an instance of BaseBrowser
|
|
422
|
-
* @param {BaseBrowserOptions} [options] - Configuration options
|
|
423
|
-
*/
|
|
424
|
-
constructor(options) {
|
|
425
|
-
this.logger = options?.logger ?? import_logger2.defaultLogger;
|
|
426
|
-
this.logger.info("Browser Options:", options);
|
|
427
|
-
}
|
|
428
|
-
/**
|
|
429
|
-
* Get the underlying Puppeteer browser instance
|
|
430
|
-
* @throws Error if browser is not launched
|
|
431
|
-
|
|
432
|
-
* @returns {puppeteer.Browser} Puppeteer browser instance
|
|
433
|
-
*/
|
|
434
|
-
getBrowser() {
|
|
435
|
-
if (!this.browser) {
|
|
436
|
-
throw new Error("Browser not launched");
|
|
437
|
-
}
|
|
438
|
-
return this.browser;
|
|
439
|
-
}
|
|
440
|
-
/**
|
|
441
|
-
* Sets up listeners for browser page events
|
|
442
|
-
* Tracks page creation and updates active page reference
|
|
443
|
-
* @protected
|
|
444
|
-
*/
|
|
445
|
-
async setupPageListener() {
|
|
446
|
-
if (!this.browser) return;
|
|
447
|
-
this.browser.on("targetcreated", async (target) => {
|
|
448
|
-
const page = await target.page();
|
|
449
|
-
if (page) {
|
|
450
|
-
this.logger.info("New page created:", await page.url());
|
|
451
|
-
this.activePage = page;
|
|
452
|
-
page.once("close", () => {
|
|
453
|
-
if (this.activePage === page) {
|
|
454
|
-
this.activePage = null;
|
|
455
|
-
}
|
|
456
|
-
});
|
|
457
|
-
page.once("error", () => {
|
|
458
|
-
if (this.activePage === page) {
|
|
459
|
-
this.activePage = null;
|
|
460
|
-
}
|
|
461
|
-
});
|
|
462
|
-
}
|
|
463
|
-
});
|
|
464
|
-
}
|
|
465
|
-
/**
|
|
466
|
-
* Closes the browser instance and cleans up resources
|
|
467
|
-
* @returns {Promise<void>} Promise that resolves when browser is closed
|
|
468
|
-
* @throws {Error} If browser fails to close properly
|
|
469
|
-
*/
|
|
470
|
-
async close() {
|
|
471
|
-
this.logger.info("Closing browser");
|
|
472
|
-
try {
|
|
473
|
-
await this.browser?.close();
|
|
474
|
-
this.browser = null;
|
|
475
|
-
this.logger.success("Browser closed successfully");
|
|
476
|
-
} catch (error) {
|
|
477
|
-
this.logger.error("Failed to close browser:", error);
|
|
478
|
-
throw error;
|
|
479
|
-
}
|
|
480
|
-
}
|
|
481
|
-
/**
|
|
482
|
-
* Creates a new page, navigates to the specified URL, executes a function in the page context, and returns the result
|
|
483
|
-
* This method is inspired and modified from https://github.com/egoist/local-web-search/blob/04608ed09aa103e2fff6402c72ca12edfb692d19/src/browser.ts#L74
|
|
484
|
-
* @template T - Type of parameters passed to the page function
|
|
485
|
-
* @template R - Return type of the page function
|
|
486
|
-
* @param {EvaluateOnNewPageOptions<T, R>} options - Configuration options for the page evaluation
|
|
487
|
-
* @returns {Promise<R | null>} Promise resolving to the result of the page function or null
|
|
488
|
-
* @throws {Error} If page creation or evaluation fails
|
|
489
|
-
*/
|
|
490
|
-
async evaluateOnNewPage(options) {
|
|
491
|
-
const {
|
|
492
|
-
url: url2,
|
|
493
|
-
pageFunction,
|
|
494
|
-
pageFunctionParams,
|
|
495
|
-
beforePageLoad,
|
|
496
|
-
afterPageLoad,
|
|
497
|
-
beforeSendResult,
|
|
498
|
-
waitForOptions
|
|
499
|
-
} = options;
|
|
500
|
-
const page = await this.browser.newPage();
|
|
501
|
-
try {
|
|
502
|
-
await beforePageLoad?.(page);
|
|
503
|
-
await page.goto(url2, {
|
|
504
|
-
waitUntil: "networkidle2",
|
|
505
|
-
...waitForOptions
|
|
506
|
-
});
|
|
507
|
-
await afterPageLoad?.(page);
|
|
508
|
-
const _window = await page.evaluateHandle(() => window);
|
|
509
|
-
const result = await page.evaluate(
|
|
510
|
-
pageFunction,
|
|
511
|
-
_window,
|
|
512
|
-
...pageFunctionParams
|
|
513
|
-
);
|
|
514
|
-
await beforeSendResult?.(page, result);
|
|
515
|
-
await _window.dispose();
|
|
516
|
-
await page.close();
|
|
517
|
-
return result;
|
|
518
|
-
} catch (error) {
|
|
519
|
-
await page.close();
|
|
520
|
-
throw error;
|
|
521
|
-
}
|
|
522
|
-
}
|
|
523
|
-
/**
|
|
524
|
-
* Creates a new browser page
|
|
525
|
-
* @returns {Promise<Page>} Promise resolving to the newly created page
|
|
526
|
-
* @throws {Error} If browser is not launched or page creation fails
|
|
527
|
-
*/
|
|
528
|
-
async createPage() {
|
|
529
|
-
if (!this.browser) {
|
|
530
|
-
this.logger.error("No active browser");
|
|
531
|
-
throw new Error("Browser not launched");
|
|
532
|
-
}
|
|
533
|
-
const page = await this.browser.newPage();
|
|
534
|
-
return page;
|
|
535
|
-
}
|
|
536
|
-
/**
|
|
537
|
-
* Gets the currently active page or finds an active page if none is currently tracked
|
|
538
|
-
* If no active pages exist, creates a new page
|
|
539
|
-
* @returns {Promise<Page>} Promise resolving to the active page
|
|
540
|
-
* @throws {Error} If browser is not launched or no active page can be found/created
|
|
541
|
-
*/
|
|
542
|
-
async getActivePage() {
|
|
543
|
-
if (!this.browser) {
|
|
544
|
-
throw new Error("Browser not launched");
|
|
545
|
-
}
|
|
546
|
-
if (this.activePage) {
|
|
547
|
-
try {
|
|
548
|
-
await this.activePage.evaluate(() => document.readyState);
|
|
549
|
-
return this.activePage;
|
|
550
|
-
} catch (e) {
|
|
551
|
-
this.logger.warn("Active page no longer available:", e);
|
|
552
|
-
this.activePage = null;
|
|
553
|
-
}
|
|
554
|
-
}
|
|
555
|
-
const pages = await this.browser.pages();
|
|
556
|
-
if (pages.length === 0) {
|
|
557
|
-
this.activePage = await this.createPage();
|
|
558
|
-
return this.activePage;
|
|
559
|
-
}
|
|
560
|
-
for (let i = pages.length - 1; i >= 0; i--) {
|
|
561
|
-
const page = pages[i];
|
|
562
|
-
try {
|
|
563
|
-
await page.evaluate(() => document.readyState);
|
|
564
|
-
this.activePage = page;
|
|
565
|
-
return page;
|
|
566
|
-
} catch (e) {
|
|
567
|
-
continue;
|
|
568
|
-
}
|
|
569
|
-
}
|
|
570
|
-
throw new Error("No active page found");
|
|
571
|
-
}
|
|
572
|
-
};
|
|
573
|
-
|
|
574
|
-
// src/libs/browser/local.ts
|
|
575
|
-
var puppeteer = __toESM(require("puppeteer-core"), 1);
|
|
576
|
-
var LocalBrowser = class extends BaseBrowser {
|
|
577
|
-
/**
|
|
578
|
-
* Browser finder instance to detect and locate installed browsers
|
|
579
|
-
* @private
|
|
580
|
-
*/
|
|
581
|
-
browserFinder = new BrowserFinder();
|
|
582
|
-
/**
|
|
583
|
-
* Launches a local browser instance with specified options
|
|
584
|
-
* Automatically detects installed browsers if no executable path is provided
|
|
585
|
-
* @param {LaunchOptions} options - Configuration options for launching the browser
|
|
586
|
-
* @returns {Promise<void>} Promise that resolves when the browser is successfully launched
|
|
587
|
-
* @throws {Error} If the browser cannot be launched
|
|
588
|
-
*/
|
|
589
|
-
async launch(options = {}) {
|
|
590
|
-
this.logger.info("Launching browser with options:", options);
|
|
591
|
-
const executablePath = options?.executablePath || this.browserFinder.findBrowser().executable;
|
|
592
|
-
this.logger.info("Using executable path:", executablePath);
|
|
593
|
-
const viewportWidth = options?.defaultViewport?.width ?? 1280;
|
|
594
|
-
const viewportHeight = options?.defaultViewport?.height ?? 800;
|
|
595
|
-
const puppeteerLaunchOptions = {
|
|
596
|
-
executablePath,
|
|
597
|
-
headless: options?.headless ?? false,
|
|
598
|
-
defaultViewport: {
|
|
599
|
-
width: viewportWidth,
|
|
600
|
-
height: viewportHeight
|
|
601
|
-
},
|
|
602
|
-
args: [
|
|
603
|
-
"--no-sandbox",
|
|
604
|
-
"--mute-audio",
|
|
605
|
-
"--disable-gpu",
|
|
606
|
-
"--disable-http2",
|
|
607
|
-
"--disable-blink-features=AutomationControlled",
|
|
608
|
-
"--disable-infobars",
|
|
609
|
-
"--disable-background-timer-throttling",
|
|
610
|
-
"--disable-popup-blocking",
|
|
611
|
-
"--disable-backgrounding-occluded-windows",
|
|
612
|
-
"--disable-renderer-backgrounding",
|
|
613
|
-
"--disable-window-activation",
|
|
614
|
-
"--disable-focus-on-load",
|
|
615
|
-
"--no-default-browser-check",
|
|
616
|
-
// disable default browser check
|
|
617
|
-
"--disable-web-security",
|
|
618
|
-
// disable CORS
|
|
619
|
-
"--disable-features=IsolateOrigins,site-per-process",
|
|
620
|
-
"--disable-site-isolation-trials",
|
|
621
|
-
`--window-size=${viewportWidth},${viewportHeight + 90}`,
|
|
622
|
-
options?.proxy ? `--proxy-server=${options.proxy}` : "",
|
|
623
|
-
options?.profilePath ? `--profile-directory=${options.profilePath}` : ""
|
|
624
|
-
].filter(Boolean),
|
|
625
|
-
ignoreDefaultArgs: ["--enable-automation"],
|
|
626
|
-
timeout: options.timeout ?? 0,
|
|
627
|
-
downloadBehavior: {
|
|
628
|
-
policy: "deny"
|
|
629
|
-
}
|
|
630
|
-
};
|
|
631
|
-
this.logger.info("Launch options:", puppeteerLaunchOptions);
|
|
632
|
-
try {
|
|
633
|
-
this.browser = await puppeteer.launch(puppeteerLaunchOptions);
|
|
634
|
-
await this.setupPageListener();
|
|
635
|
-
this.logger.success("Browser launched successfully");
|
|
636
|
-
} catch (error) {
|
|
637
|
-
this.logger.error("Failed to launch browser:", error);
|
|
638
|
-
throw error;
|
|
639
|
-
}
|
|
640
|
-
}
|
|
641
|
-
};
|
|
642
|
-
|
|
643
|
-
// src/libs/browser/remote.ts
|
|
644
|
-
var puppeteer2 = __toESM(require("puppeteer-core"), 1);
|
|
645
|
-
|
|
646
|
-
// src/libs/browser-search/readability.ts
|
|
647
|
-
var READABILITY_SCRIPT = 'function q(t,e){if(e&&e.documentElement)t=e,e=arguments[2];else if(!t||!t.documentElement)throw new Error("First argument to Readability constructor should be a document object.");if(e=e||{},this._doc=t,this._docJSDOMParser=this._doc.firstChild.__JSDOMParser__,this._articleTitle=null,this._articleByline=null,this._articleDir=null,this._articleSiteName=null,this._attempts=[],this._debug=!!e.debug,this._maxElemsToParse=e.maxElemsToParse||this.DEFAULT_MAX_ELEMS_TO_PARSE,this._nbTopCandidates=e.nbTopCandidates||this.DEFAULT_N_TOP_CANDIDATES,this._charThreshold=e.charThreshold||this.DEFAULT_CHAR_THRESHOLD,this._classesToPreserve=this.CLASSES_TO_PRESERVE.concat(e.classesToPreserve||[]),this._keepClasses=!!e.keepClasses,this._serializer=e.serializer||function(i){return i.innerHTML},this._disableJSONLD=!!e.disableJSONLD,this._allowedVideoRegex=e.allowedVideoRegex||this.REGEXPS.videos,this._flags=this.FLAG_STRIP_UNLIKELYS|this.FLAG_WEIGHT_CLASSES|this.FLAG_CLEAN_CONDITIONALLY,this._debug){let i=function(r){if(r.nodeType==r.TEXT_NODE)return`${r.nodeName} ("${r.textContent}")`;let l=Array.from(r.attributes||[],function(a){return`${a.name}="${a.value}"`}).join(" ");return`<${r.localName} ${l}>`};this.log=function(){if(typeof console!="undefined"){let l=Array.from(arguments,a=>a&&a.nodeType==this.ELEMENT_NODE?i(a):a);l.unshift("Reader: (Readability)"),console.log.apply(console,l)}else if(typeof dump!="undefined"){var r=Array.prototype.map.call(arguments,function(l){return l&&l.nodeName?i(l):l}).join(" ");dump("Reader: (Readability) "+r+`\n`)}}}else this.log=function(){}}q.prototype={FLAG_STRIP_UNLIKELYS:1,FLAG_WEIGHT_CLASSES:2,FLAG_CLEAN_CONDITIONALLY:4,ELEMENT_NODE:1,TEXT_NODE:3,DEFAULT_MAX_ELEMS_TO_PARSE:0,DEFAULT_N_TOP_CANDIDATES:5,DEFAULT_TAGS_TO_SCORE:"section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","),DEFAULT_CHAR_THRESHOLD:500,REGEXPS:{unlikelyCandidates:/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,okMaybeItsACandidate:/and|article|body|column|content|main|shadow/i,positive:/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,negative:/-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,extraneous:/print|archive|comment|discuss|e[\\-]?mail|share|reply|all|login|sign|single|utility/i,byline:/byline|author|dateline|writtenby|p-author/i,replaceFonts:/<(\\/?)font[^>]*>/gi,normalize:/\\s{2,}/g,videos:/\\/\\/(www\\.)?((dailymotion|youtube|youtube-nocookie|player\\.vimeo|v\\.qq)\\.com|(archive|upload\\.wikimedia)\\.org|player\\.twitch\\.tv)/i,shareElements:/(\\b|_)(share|sharedaddy)(\\b|_)/i,nextLink:/(next|weiter|continue|>([^\\|]|$)|\xBB([^\\|]|$))/i,prevLink:/(prev|earl|old|new|<|\xAB)/i,tokenize:/\\W+/g,whitespace:/^\\s*$/,hasContent:/\\S$/,hashUrl:/^#.+/,srcsetUrl:/(\\S+)(\\s+[\\d.]+[xw])?(\\s*(?:,|$))/g,b64DataUrl:/^data:\\s*([^\\s;,]+)\\s*;\\s*base64\\s*,/i,commas:/\\u002C|\\u060C|\\uFE50|\\uFE10|\\uFE11|\\u2E41|\\u2E34|\\u2E32|\\uFF0C/g,jsonLdArticleTypes:/^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/},UNLIKELY_ROLES:["menu","menubar","complementary","navigation","alert","alertdialog","dialog"],DIV_TO_P_ELEMS:new Set(["BLOCKQUOTE","DL","DIV","IMG","OL","P","PRE","TABLE","UL"]),ALTER_TO_DIV_EXCEPTIONS:["DIV","ARTICLE","SECTION","P"],PRESENTATIONAL_ATTRIBUTES:["align","background","bgcolor","border","cellpadding","cellspacing","frame","hspace","rules","style","valign","vspace"],DEPRECATED_SIZE_ATTRIBUTE_ELEMS:["TABLE","TH","TD","HR","PRE"],PHRASING_ELEMS:["ABBR","AUDIO","B","BDO","BR","BUTTON","CITE","CODE","DATA","DATALIST","DFN","EM","EMBED","I","IMG","INPUT","KBD","LABEL","MARK","MATH","METER","NOSCRIPT","OBJECT","OUTPUT","PROGRESS","Q","RUBY","SAMP","SCRIPT","SELECT","SMALL","SPAN","STRONG","SUB","SUP","TEXTAREA","TIME","VAR","WBR"],CLASSES_TO_PRESERVE:["page"],HTML_ESCAPE_MAP:{lt:"<",gt:">",amp:"&",quot:\'"\',apos:"\'"},_postProcessContent:function(t){this._fixRelativeUris(t),this._simplifyNestedElements(t),this._keepClasses||this._cleanClasses(t)},_removeNodes:function(t,e){if(this._docJSDOMParser&&t._isLiveNodeList)throw new Error("Do not pass live node lists to _removeNodes");for(var i=t.length-1;i>=0;i--){var r=t[i],l=r.parentNode;l&&(!e||e.call(this,r,i,t))&&l.removeChild(r)}},_replaceNodeTags:function(t,e){if(this._docJSDOMParser&&t._isLiveNodeList)throw new Error("Do not pass live node lists to _replaceNodeTags");for(let i of t)this._setNodeTag(i,e)},_forEachNode:function(t,e){Array.prototype.forEach.call(t,e,this)},_findNode:function(t,e){return Array.prototype.find.call(t,e,this)},_someNode:function(t,e){return Array.prototype.some.call(t,e,this)},_everyNode:function(t,e){return Array.prototype.every.call(t,e,this)},_concatNodeLists:function(){var t=Array.prototype.slice,e=t.call(arguments),i=e.map(function(r){return t.call(r)});return Array.prototype.concat.apply([],i)},_getAllNodesWithTag:function(t,e){return t.querySelectorAll?t.querySelectorAll(e.join(",")):[].concat.apply([],e.map(function(i){var r=t.getElementsByTagName(i);return Array.isArray(r)?r:Array.from(r)}))},_cleanClasses:function(t){var e=this._classesToPreserve,i=(t.getAttribute("class")||"").split(/\\s+/).filter(function(r){return e.indexOf(r)!=-1}).join(" ");for(i?t.setAttribute("class",i):t.removeAttribute("class"),t=t.firstElementChild;t;t=t.nextElementSibling)this._cleanClasses(t)},_fixRelativeUris:function(t){var e=this._doc.baseURI,i=this._doc.documentURI;function r(s){if(e==i&&s.charAt(0)=="#")return s;try{return new URL(s,e).href}catch(h){}return s}var l=this._getAllNodesWithTag(t,["a"]);this._forEachNode(l,function(s){var h=s.getAttribute("href");if(h)if(h.indexOf("javascript:")===0)if(s.childNodes.length===1&&s.childNodes[0].nodeType===this.TEXT_NODE){var c=this._doc.createTextNode(s.textContent);s.parentNode.replaceChild(c,s)}else{for(var n=this._doc.createElement("span");s.firstChild;)n.appendChild(s.firstChild);s.parentNode.replaceChild(n,s)}else s.setAttribute("href",r(h))});var a=this._getAllNodesWithTag(t,["img","picture","figure","video","audio","source"]);this._forEachNode(a,function(s){var h=s.getAttribute("src"),c=s.getAttribute("poster"),n=s.getAttribute("srcset");if(h&&s.setAttribute("src",r(h)),c&&s.setAttribute("poster",r(c)),n){var u=n.replace(this.REGEXPS.srcsetUrl,function(m,b,N,v){return r(b)+(N||"")+v});s.setAttribute("srcset",u)}})},_simplifyNestedElements:function(t){for(var e=t;e;){if(e.parentNode&&["DIV","SECTION"].includes(e.tagName)&&!(e.id&&e.id.startsWith("readability"))){if(this._isElementWithoutContent(e)){e=this._removeAndGetNext(e);continue}else if(this._hasSingleTagInsideElement(e,"DIV")||this._hasSingleTagInsideElement(e,"SECTION")){for(var i=e.children[0],r=0;r<e.attributes.length;r++)i.setAttribute(e.attributes[r].name,e.attributes[r].value);e.parentNode.replaceChild(i,e),e=i;continue}}e=this._getNextNode(e)}},_getArticleTitle:function(){var t=this._doc,e="",i="";try{e=i=t.title.trim(),typeof e!="string"&&(e=i=this._getInnerText(t.getElementsByTagName("title")[0]))}catch(u){}var r=!1;function l(u){return u.split(/\\s+/).length}if(/ [\\|\\-\\\\\\/>\xBB] /.test(e))r=/ [\\\\\\/>\xBB] /.test(e),e=i.replace(/(.*)[\\|\\-\\\\\\/>\xBB] .*/gi,"$1"),l(e)<3&&(e=i.replace(/[^\\|\\-\\\\\\/>\xBB]*[\\|\\-\\\\\\/>\xBB](.*)/gi,"$1"));else if(e.indexOf(": ")!==-1){var a=this._concatNodeLists(t.getElementsByTagName("h1"),t.getElementsByTagName("h2")),s=e.trim(),h=this._someNode(a,function(u){return u.textContent.trim()===s});h||(e=i.substring(i.lastIndexOf(":")+1),l(e)<3?e=i.substring(i.indexOf(":")+1):l(i.substr(0,i.indexOf(":")))>5&&(e=i))}else if(e.length>150||e.length<15){var c=t.getElementsByTagName("h1");c.length===1&&(e=this._getInnerText(c[0]))}e=e.trim().replace(this.REGEXPS.normalize," ");var n=l(e);return n<=4&&(!r||n!=l(i.replace(/[\\|\\-\\\\\\/>\xBB]+/g,""))-1)&&(e=i),e},_prepDocument:function(){var t=this._doc;this._removeNodes(this._getAllNodesWithTag(t,["style"])),t.body&&this._replaceBrs(t.body),this._replaceNodeTags(this._getAllNodesWithTag(t,["font"]),"SPAN")},_nextNode:function(t){for(var e=t;e&&e.nodeType!=this.ELEMENT_NODE&&this.REGEXPS.whitespace.test(e.textContent);)e=e.nextSibling;return e},_replaceBrs:function(t){this._forEachNode(this._getAllNodesWithTag(t,["br"]),function(e){for(var i=e.nextSibling,r=!1;(i=this._nextNode(i))&&i.tagName=="BR";){r=!0;var l=i.nextSibling;i.parentNode.removeChild(i),i=l}if(r){var a=this._doc.createElement("p");for(e.parentNode.replaceChild(a,e),i=a.nextSibling;i;){if(i.tagName=="BR"){var s=this._nextNode(i.nextSibling);if(s&&s.tagName=="BR")break}if(!this._isPhrasingContent(i))break;var h=i.nextSibling;a.appendChild(i),i=h}for(;a.lastChild&&this._isWhitespace(a.lastChild);)a.removeChild(a.lastChild);a.parentNode.tagName==="P"&&this._setNodeTag(a.parentNode,"DIV")}})},_setNodeTag:function(t,e){if(this.log("_setNodeTag",t,e),this._docJSDOMParser)return t.localName=e.toLowerCase(),t.tagName=e.toUpperCase(),t;for(var i=t.ownerDocument.createElement(e);t.firstChild;)i.appendChild(t.firstChild);t.parentNode.replaceChild(i,t),t.readability&&(i.readability=t.readability);for(var r=0;r<t.attributes.length;r++)try{i.setAttribute(t.attributes[r].name,t.attributes[r].value)}catch(l){}return i},_prepArticle:function(t){this._cleanStyles(t),this._markDataTables(t),this._fixLazyImages(t),this._cleanConditionally(t,"form"),this._cleanConditionally(t,"fieldset"),this._clean(t,"object"),this._clean(t,"embed"),this._clean(t,"footer"),this._clean(t,"link"),this._clean(t,"aside");var e=this.DEFAULT_CHAR_THRESHOLD;this._forEachNode(t.children,function(i){this._cleanMatchedNodes(i,function(r,l){return this.REGEXPS.shareElements.test(l)&&r.textContent.length<e})}),this._clean(t,"iframe"),this._clean(t,"input"),this._clean(t,"textarea"),this._clean(t,"select"),this._clean(t,"button"),this._cleanHeaders(t),this._cleanConditionally(t,"table"),this._cleanConditionally(t,"ul"),this._cleanConditionally(t,"div"),this._replaceNodeTags(this._getAllNodesWithTag(t,["h1"]),"h2"),this._removeNodes(this._getAllNodesWithTag(t,["p"]),function(i){var r=i.getElementsByTagName("img").length,l=i.getElementsByTagName("embed").length,a=i.getElementsByTagName("object").length,s=i.getElementsByTagName("iframe").length,h=r+l+a+s;return h===0&&!this._getInnerText(i,!1)}),this._forEachNode(this._getAllNodesWithTag(t,["br"]),function(i){var r=this._nextNode(i.nextSibling);r&&r.tagName=="P"&&i.parentNode.removeChild(i)}),this._forEachNode(this._getAllNodesWithTag(t,["table"]),function(i){var r=this._hasSingleTagInsideElement(i,"TBODY")?i.firstElementChild:i;if(this._hasSingleTagInsideElement(r,"TR")){var l=r.firstElementChild;if(this._hasSingleTagInsideElement(l,"TD")){var a=l.firstElementChild;a=this._setNodeTag(a,this._everyNode(a.childNodes,this._isPhrasingContent)?"P":"DIV"),i.parentNode.replaceChild(a,i)}}})},_initializeNode:function(t){switch(t.readability={contentScore:0},t.tagName){case"DIV":t.readability.contentScore+=5;break;case"PRE":case"TD":case"BLOCKQUOTE":t.readability.contentScore+=3;break;case"ADDRESS":case"OL":case"UL":case"DL":case"DD":case"DT":case"LI":case"FORM":t.readability.contentScore-=3;break;case"H1":case"H2":case"H3":case"H4":case"H5":case"H6":case"TH":t.readability.contentScore-=5;break}t.readability.contentScore+=this._getClassWeight(t)},_removeAndGetNext:function(t){var e=this._getNextNode(t,!0);return t.parentNode.removeChild(t),e},_getNextNode:function(t,e){if(!e&&t.firstElementChild)return t.firstElementChild;if(t.nextElementSibling)return t.nextElementSibling;do t=t.parentNode;while(t&&!t.nextElementSibling);return t&&t.nextElementSibling},_textSimilarity:function(t,e){var i=t.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean),r=e.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean);if(!i.length||!r.length)return 0;var l=r.filter(s=>!i.includes(s)),a=l.join(" ").length/r.join(" ").length;return 1-a},_checkByline:function(t,e){if(this._articleByline)return!1;if(t.getAttribute!==void 0)var i=t.getAttribute("rel"),r=t.getAttribute("itemprop");return(i==="author"||r&&r.indexOf("author")!==-1||this.REGEXPS.byline.test(e))&&this._isValidByline(t.textContent)?(this._articleByline=t.textContent.trim(),!0):!1},_getNodeAncestors:function(t,e){e=e||0;for(var i=0,r=[];t.parentNode&&(r.push(t.parentNode),!(e&&++i===e));)t=t.parentNode;return r},_grabArticle:function(t){this.log("**** grabArticle ****");var e=this._doc,i=t!==null;if(t=t||this._doc.body,!t)return this.log("No body found in document. Abort."),null;for(var r=t.innerHTML;;){this.log("Starting grabArticle loop");var l=this._flagIsActive(this.FLAG_STRIP_UNLIKELYS),a=[],s=this._doc.documentElement;let J=!0;for(;s;){s.tagName==="HTML"&&(this._articleLang=s.getAttribute("lang"));var h=s.className+" "+s.id;if(!this._isProbablyVisible(s)){this.log("Removing hidden node - "+h),s=this._removeAndGetNext(s);continue}if(s.getAttribute("aria-modal")=="true"&&s.getAttribute("role")=="dialog"){s=this._removeAndGetNext(s);continue}if(this._checkByline(s,h)){s=this._removeAndGetNext(s);continue}if(J&&this._headerDuplicatesTitle(s)){this.log("Removing header: ",s.textContent.trim(),this._articleTitle.trim()),J=!1,s=this._removeAndGetNext(s);continue}if(l){if(this.REGEXPS.unlikelyCandidates.test(h)&&!this.REGEXPS.okMaybeItsACandidate.test(h)&&!this._hasAncestorTag(s,"table")&&!this._hasAncestorTag(s,"code")&&s.tagName!=="BODY"&&s.tagName!=="A"){this.log("Removing unlikely candidate - "+h),s=this._removeAndGetNext(s);continue}if(this.UNLIKELY_ROLES.includes(s.getAttribute("role"))){this.log("Removing content with role "+s.getAttribute("role")+" - "+h),s=this._removeAndGetNext(s);continue}}if((s.tagName==="DIV"||s.tagName==="SECTION"||s.tagName==="HEADER"||s.tagName==="H1"||s.tagName==="H2"||s.tagName==="H3"||s.tagName==="H4"||s.tagName==="H5"||s.tagName==="H6")&&this._isElementWithoutContent(s)){s=this._removeAndGetNext(s);continue}if(this.DEFAULT_TAGS_TO_SCORE.indexOf(s.tagName)!==-1&&a.push(s),s.tagName==="DIV"){for(var c=null,n=s.firstChild;n;){var u=n.nextSibling;if(this._isPhrasingContent(n))c!==null?c.appendChild(n):this._isWhitespace(n)||(c=e.createElement("p"),s.replaceChild(c,n),c.appendChild(n));else if(c!==null){for(;c.lastChild&&this._isWhitespace(c.lastChild);)c.removeChild(c.lastChild);c=null}n=u}if(this._hasSingleTagInsideElement(s,"P")&&this._getLinkDensity(s)<.25){var m=s.children[0];s.parentNode.replaceChild(m,s),s=m,a.push(s)}else this._hasChildBlockElement(s)||(s=this._setNodeTag(s,"P"),a.push(s))}s=this._getNextNode(s)}var b=[];this._forEachNode(a,function(A){if(!(!A.parentNode||typeof A.parentNode.tagName=="undefined")){var T=this._getInnerText(A);if(!(T.length<25)){var K=this._getNodeAncestors(A,5);if(K.length!==0){var C=0;C+=1,C+=T.split(this.REGEXPS.commas).length,C+=Math.min(Math.floor(T.length/100),3),this._forEachNode(K,function(S,F){if(!(!S.tagName||!S.parentNode||typeof S.parentNode.tagName=="undefined")){if(typeof S.readability=="undefined"&&(this._initializeNode(S),b.push(S)),F===0)var X=1;else F===1?X=2:X=F*3;S.readability.contentScore+=C/X}})}}}});for(var N=[],v=0,y=b.length;v<y;v+=1){var E=b[v],d=E.readability.contentScore*(1-this._getLinkDensity(E));E.readability.contentScore=d,this.log("Candidate:",E,"with score "+d);for(var p=0;p<this._nbTopCandidates;p++){var x=N[p];if(!x||d>x.readability.contentScore){N.splice(p,0,E),N.length>this._nbTopCandidates&&N.pop();break}}}var o=N[0]||null,L=!1,g;if(o===null||o.tagName==="BODY"){for(o=e.createElement("DIV"),L=!0;t.firstChild;)this.log("Moving child out:",t.firstChild),o.appendChild(t.firstChild);t.appendChild(o),this._initializeNode(o)}else if(o){for(var I=[],P=1;P<N.length;P++)N[P].readability.contentScore/o.readability.contentScore>=.75&&I.push(this._getNodeAncestors(N[P]));var O=3;if(I.length>=O)for(g=o.parentNode;g.tagName!=="BODY";){for(var G=0,H=0;H<I.length&&G<O;H++)G+=Number(I[H].includes(g));if(G>=O){o=g;break}g=g.parentNode}o.readability||this._initializeNode(o),g=o.parentNode;for(var M=o.readability.contentScore,Q=M/3;g.tagName!=="BODY";){if(!g.readability){g=g.parentNode;continue}var V=g.readability.contentScore;if(V<Q)break;if(V>M){o=g;break}M=g.readability.contentScore,g=g.parentNode}for(g=o.parentNode;g.tagName!="BODY"&&g.children.length==1;)o=g,g=o.parentNode;o.readability||this._initializeNode(o)}var _=e.createElement("DIV");i&&(_.id="readability-content");var Z=Math.max(10,o.readability.contentScore*.2);g=o.parentNode;for(var U=g.children,w=0,j=U.length;w<j;w++){var f=U[w],R=!1;if(this.log("Looking at sibling node:",f,f.readability?"with score "+f.readability.contentScore:""),this.log("Sibling has score",f.readability?f.readability.contentScore:"Unknown"),f===o)R=!0;else{var $=0;if(f.className===o.className&&o.className!==""&&($+=o.readability.contentScore*.2),f.readability&&f.readability.contentScore+$>=Z)R=!0;else if(f.nodeName==="P"){var Y=this._getLinkDensity(f),z=this._getInnerText(f),k=z.length;(k>80&&Y<.25||k<80&&k>0&&Y===0&&z.search(/\\.( |$)/)!==-1)&&(R=!0)}}R&&(this.log("Appending node:",f),this.ALTER_TO_DIV_EXCEPTIONS.indexOf(f.nodeName)===-1&&(this.log("Altering sibling:",f,"to div."),f=this._setNodeTag(f,"DIV")),_.appendChild(f),U=g.children,w-=1,j-=1)}if(this._debug&&this.log("Article content pre-prep: "+_.innerHTML),this._prepArticle(_),this._debug&&this.log("Article content post-prep: "+_.innerHTML),L)o.id="readability-page-1",o.className="page";else{var B=e.createElement("DIV");for(B.id="readability-page-1",B.className="page";_.firstChild;)B.appendChild(_.firstChild);_.appendChild(B)}this._debug&&this.log("Article content after paging: "+_.innerHTML);var W=!0,D=this._getInnerText(_,!0).length;if(D<this._charThreshold)if(W=!1,t.innerHTML=r,this._flagIsActive(this.FLAG_STRIP_UNLIKELYS))this._removeFlag(this.FLAG_STRIP_UNLIKELYS),this._attempts.push({articleContent:_,textLength:D});else if(this._flagIsActive(this.FLAG_WEIGHT_CLASSES))this._removeFlag(this.FLAG_WEIGHT_CLASSES),this._attempts.push({articleContent:_,textLength:D});else if(this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY))this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY),this._attempts.push({articleContent:_,textLength:D});else{if(this._attempts.push({articleContent:_,textLength:D}),this._attempts.sort(function(A,T){return T.textLength-A.textLength}),!this._attempts[0].textLength)return null;_=this._attempts[0].articleContent,W=!0}if(W){var tt=[g,o].concat(this._getNodeAncestors(g));return this._someNode(tt,function(A){if(!A.tagName)return!1;var T=A.getAttribute("dir");return T?(this._articleDir=T,!0):!1}),_}}},_isValidByline:function(t){return typeof t=="string"||t instanceof String?(t=t.trim(),t.length>0&&t.length<100):!1},_unescapeHtmlEntities:function(t){if(!t)return t;var e=this.HTML_ESCAPE_MAP;return t.replace(/&(quot|amp|apos|lt|gt);/g,function(i,r){return e[r]}).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi,function(i,r,l){var a=parseInt(r||l,r?16:10);return String.fromCharCode(a)})},_getJSONLD:function(t){var e=this._getAllNodesWithTag(t,["script"]),i;return this._forEachNode(e,function(r){if(!i&&r.getAttribute("type")==="application/ld+json")try{var l=r.textContent.replace(/^\\s*<!\\[CDATA\\[|\\]\\]>\\s*$/g,""),a=JSON.parse(l);if(!a["@context"]||!a["@context"].match(/^https?\\:\\/\\/schema\\.org$/)||(!a["@type"]&&Array.isArray(a["@graph"])&&(a=a["@graph"].find(function(n){return(n["@type"]||"").match(this.REGEXPS.jsonLdArticleTypes)})),!a||!a["@type"]||!a["@type"].match(this.REGEXPS.jsonLdArticleTypes)))return;if(i={},typeof a.name=="string"&&typeof a.headline=="string"&&a.name!==a.headline){var s=this._getArticleTitle(),h=this._textSimilarity(a.name,s)>.75,c=this._textSimilarity(a.headline,s)>.75;c&&!h?i.title=a.headline:i.title=a.name}else typeof a.name=="string"?i.title=a.name.trim():typeof a.headline=="string"&&(i.title=a.headline.trim());a.author&&(typeof a.author.name=="string"?i.byline=a.author.name.trim():Array.isArray(a.author)&&a.author[0]&&typeof a.author[0].name=="string"&&(i.byline=a.author.filter(function(n){return n&&typeof n.name=="string"}).map(function(n){return n.name.trim()}).join(", "))),typeof a.description=="string"&&(i.excerpt=a.description.trim()),a.publisher&&typeof a.publisher.name=="string"&&(i.siteName=a.publisher.name.trim()),typeof a.datePublished=="string"&&(i.datePublished=a.datePublished.trim());return}catch(n){this.log(n.message)}}),i||{}},_getArticleMetadata:function(t){var e={},i={},r=this._doc.getElementsByTagName("meta"),l=/\\s*(article|dc|dcterm|og|twitter)\\s*:\\s*(author|creator|description|published_time|title|site_name)\\s*/gi,a=/^\\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\\s*[\\.:]\\s*)?(author|creator|description|title|site_name)\\s*$/i;return this._forEachNode(r,function(s){var h=s.getAttribute("name"),c=s.getAttribute("property"),n=s.getAttribute("content");if(n){var u=null,m=null;c&&(u=c.match(l),u&&(m=u[0].toLowerCase().replace(/\\s/g,""),i[m]=n.trim())),!u&&h&&a.test(h)&&(m=h,n&&(m=m.toLowerCase().replace(/\\s/g,"").replace(/\\./g,":"),i[m]=n.trim()))}}),e.title=t.title||i["dc:title"]||i["dcterm:title"]||i["og:title"]||i["weibo:article:title"]||i["weibo:webpage:title"]||i.title||i["twitter:title"],e.title||(e.title=this._getArticleTitle()),e.byline=t.byline||i["dc:creator"]||i["dcterm:creator"]||i.author,e.excerpt=t.excerpt||i["dc:description"]||i["dcterm:description"]||i["og:description"]||i["weibo:article:description"]||i["weibo:webpage:description"]||i.description||i["twitter:description"],e.siteName=t.siteName||i["og:site_name"],e.publishedTime=t.datePublished||i["article:published_time"]||null,e.title=this._unescapeHtmlEntities(e.title),e.byline=this._unescapeHtmlEntities(e.byline),e.excerpt=this._unescapeHtmlEntities(e.excerpt),e.siteName=this._unescapeHtmlEntities(e.siteName),e.publishedTime=this._unescapeHtmlEntities(e.publishedTime),e},_isSingleImage:function(t){return t.tagName==="IMG"?!0:t.children.length!==1||t.textContent.trim()!==""?!1:this._isSingleImage(t.children[0])},_unwrapNoscriptImages:function(t){var e=Array.from(t.getElementsByTagName("img"));this._forEachNode(e,function(r){for(var l=0;l<r.attributes.length;l++){var a=r.attributes[l];switch(a.name){case"src":case"srcset":case"data-src":case"data-srcset":return}if(/\\.(jpg|jpeg|png|webp)/i.test(a.value))return}r.parentNode.removeChild(r)});var i=Array.from(t.getElementsByTagName("noscript"));this._forEachNode(i,function(r){var l=t.createElement("div");if(l.innerHTML=r.innerHTML,!!this._isSingleImage(l)){var a=r.previousElementSibling;if(a&&this._isSingleImage(a)){var s=a;s.tagName!=="IMG"&&(s=a.getElementsByTagName("img")[0]);for(var h=l.getElementsByTagName("img")[0],c=0;c<s.attributes.length;c++){var n=s.attributes[c];if(n.value!==""&&(n.name==="src"||n.name==="srcset"||/\\.(jpg|jpeg|png|webp)/i.test(n.value))){if(h.getAttribute(n.name)===n.value)continue;var u=n.name;h.hasAttribute(u)&&(u="data-old-"+u),h.setAttribute(u,n.value)}}r.parentNode.replaceChild(l.firstElementChild,a)}}})},_removeScripts:function(t){this._removeNodes(this._getAllNodesWithTag(t,["script","noscript"]))},_hasSingleTagInsideElement:function(t,e){return t.children.length!=1||t.children[0].tagName!==e?!1:!this._someNode(t.childNodes,function(i){return i.nodeType===this.TEXT_NODE&&this.REGEXPS.hasContent.test(i.textContent)})},_isElementWithoutContent:function(t){return t.nodeType===this.ELEMENT_NODE&&t.textContent.trim().length==0&&(t.children.length==0||t.children.length==t.getElementsByTagName("br").length+t.getElementsByTagName("hr").length)},_hasChildBlockElement:function(t){return this._someNode(t.childNodes,function(e){return this.DIV_TO_P_ELEMS.has(e.tagName)||this._hasChildBlockElement(e)})},_isPhrasingContent:function(t){return t.nodeType===this.TEXT_NODE||this.PHRASING_ELEMS.indexOf(t.tagName)!==-1||(t.tagName==="A"||t.tagName==="DEL"||t.tagName==="INS")&&this._everyNode(t.childNodes,this._isPhrasingContent)},_isWhitespace:function(t){return t.nodeType===this.TEXT_NODE&&t.textContent.trim().length===0||t.nodeType===this.ELEMENT_NODE&&t.tagName==="BR"},_getInnerText:function(t,e){e=typeof e=="undefined"?!0:e;var i=t.textContent.trim();return e?i.replace(this.REGEXPS.normalize," "):i},_getCharCount:function(t,e){return e=e||",",this._getInnerText(t).split(e).length-1},_cleanStyles:function(t){if(!(!t||t.tagName.toLowerCase()==="svg")){for(var e=0;e<this.PRESENTATIONAL_ATTRIBUTES.length;e++)t.removeAttribute(this.PRESENTATIONAL_ATTRIBUTES[e]);this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.indexOf(t.tagName)!==-1&&(t.removeAttribute("width"),t.removeAttribute("height"));for(var i=t.firstElementChild;i!==null;)this._cleanStyles(i),i=i.nextElementSibling}},_getLinkDensity:function(t){var e=this._getInnerText(t).length;if(e===0)return 0;var i=0;return this._forEachNode(t.getElementsByTagName("a"),function(r){var l=r.getAttribute("href"),a=l&&this.REGEXPS.hashUrl.test(l)?.3:1;i+=this._getInnerText(r).length*a}),i/e},_getClassWeight:function(t){if(!this._flagIsActive(this.FLAG_WEIGHT_CLASSES))return 0;var e=0;return typeof t.className=="string"&&t.className!==""&&(this.REGEXPS.negative.test(t.className)&&(e-=25),this.REGEXPS.positive.test(t.className)&&(e+=25)),typeof t.id=="string"&&t.id!==""&&(this.REGEXPS.negative.test(t.id)&&(e-=25),this.REGEXPS.positive.test(t.id)&&(e+=25)),e},_clean:function(t,e){var i=["object","embed","iframe"].indexOf(e)!==-1;this._removeNodes(this._getAllNodesWithTag(t,[e]),function(r){if(i){for(var l=0;l<r.attributes.length;l++)if(this._allowedVideoRegex.test(r.attributes[l].value))return!1;if(r.tagName==="object"&&this._allowedVideoRegex.test(r.innerHTML))return!1}return!0})},_hasAncestorTag:function(t,e,i,r){i=i||3,e=e.toUpperCase();for(var l=0;t.parentNode;){if(i>0&&l>i)return!1;if(t.parentNode.tagName===e&&(!r||r(t.parentNode)))return!0;t=t.parentNode,l++}return!1},_getRowAndColumnCount:function(t){for(var e=0,i=0,r=t.getElementsByTagName("tr"),l=0;l<r.length;l++){var a=r[l].getAttribute("rowspan")||0;a&&(a=parseInt(a,10)),e+=a||1;for(var s=0,h=r[l].getElementsByTagName("td"),c=0;c<h.length;c++){var n=h[c].getAttribute("colspan")||0;n&&(n=parseInt(n,10)),s+=n||1}i=Math.max(i,s)}return{rows:e,columns:i}},_markDataTables:function(t){for(var e=t.getElementsByTagName("table"),i=0;i<e.length;i++){var r=e[i],l=r.getAttribute("role");if(l=="presentation"){r._readabilityDataTable=!1;continue}var a=r.getAttribute("datatable");if(a=="0"){r._readabilityDataTable=!1;continue}var s=r.getAttribute("summary");if(s){r._readabilityDataTable=!0;continue}var h=r.getElementsByTagName("caption")[0];if(h&&h.childNodes.length>0){r._readabilityDataTable=!0;continue}var c=["col","colgroup","tfoot","thead","th"],n=function(m){return!!r.getElementsByTagName(m)[0]};if(c.some(n)){this.log("Data table because found data-y descendant"),r._readabilityDataTable=!0;continue}if(r.getElementsByTagName("table")[0]){r._readabilityDataTable=!1;continue}var u=this._getRowAndColumnCount(r);if(u.rows>=10||u.columns>4){r._readabilityDataTable=!0;continue}r._readabilityDataTable=u.rows*u.columns>10}},_fixLazyImages:function(t){this._forEachNode(this._getAllNodesWithTag(t,["img","picture","figure"]),function(e){if(e.src&&this.REGEXPS.b64DataUrl.test(e.src)){var i=this.REGEXPS.b64DataUrl.exec(e.src);if(i[1]==="image/svg+xml")return;for(var r=!1,l=0;l<e.attributes.length;l++){var a=e.attributes[l];if(a.name!=="src"&&/\\.(jpg|jpeg|png|webp)/i.test(a.value)){r=!0;break}}if(r){var s=e.src.search(/base64\\s*/i)+7,h=e.src.length-s;h<133&&e.removeAttribute("src")}}if(!((e.src||e.srcset&&e.srcset!="null")&&e.className.toLowerCase().indexOf("lazy")===-1)){for(var c=0;c<e.attributes.length;c++)if(a=e.attributes[c],!(a.name==="src"||a.name==="srcset"||a.name==="alt")){var n=null;if(/\\.(jpg|jpeg|png|webp)\\s+\\d/.test(a.value)?n="srcset":/^\\s*\\S+\\.(jpg|jpeg|png|webp)\\S*\\s*$/.test(a.value)&&(n="src"),n){if(e.tagName==="IMG"||e.tagName==="PICTURE")e.setAttribute(n,a.value);else if(e.tagName==="FIGURE"&&!this._getAllNodesWithTag(e,["img","picture"]).length){var u=this._doc.createElement("img");u.setAttribute(n,a.value),e.appendChild(u)}}}}})},_getTextDensity:function(t,e){var i=this._getInnerText(t,!0).length;if(i===0)return 0;var r=0,l=this._getAllNodesWithTag(t,e);return this._forEachNode(l,a=>r+=this._getInnerText(a,!0).length),r/i},_cleanConditionally:function(t,e){this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)&&this._removeNodes(this._getAllNodesWithTag(t,[e]),function(i){var r=function(g){return g._readabilityDataTable},l=e==="ul"||e==="ol";if(!l){var a=0,s=this._getAllNodesWithTag(i,["ul","ol"]);this._forEachNode(s,g=>a+=this._getInnerText(g).length),l=a/this._getInnerText(i).length>.9}if(e==="table"&&r(i)||this._hasAncestorTag(i,"table",-1,r)||this._hasAncestorTag(i,"code"))return!1;var h=this._getClassWeight(i);this.log("Cleaning Conditionally",i);var c=0;if(h+c<0)return!0;if(this._getCharCount(i,",")<10){for(var n=i.getElementsByTagName("p").length,u=i.getElementsByTagName("img").length,m=i.getElementsByTagName("li").length-100,b=i.getElementsByTagName("input").length,N=this._getTextDensity(i,["h1","h2","h3","h4","h5","h6"]),v=0,y=this._getAllNodesWithTag(i,["object","embed","iframe"]),E=0;E<y.length;E++){for(var d=0;d<y[E].attributes.length;d++)if(this._allowedVideoRegex.test(y[E].attributes[d].value))return!1;if(y[E].tagName==="object"&&this._allowedVideoRegex.test(y[E].innerHTML))return!1;v++}var p=this._getLinkDensity(i),x=this._getInnerText(i).length,o=u>1&&n/u<.5&&!this._hasAncestorTag(i,"figure")||!l&&m>n||b>Math.floor(n/3)||!l&&N<.9&&x<25&&(u===0||u>2)&&!this._hasAncestorTag(i,"figure")||!l&&h<25&&p>.2||h>=25&&p>.5||v===1&&x<75||v>1;if(l&&o){for(var L=0;L<i.children.length;L++)if(i.children[L].children.length>1)return o;let g=i.getElementsByTagName("li").length;if(u==g)return!1}return o}return!1})},_cleanMatchedNodes:function(t,e){for(var i=this._getNextNode(t,!0),r=this._getNextNode(t);r&&r!=i;)e.call(this,r,r.className+" "+r.id)?r=this._removeAndGetNext(r):r=this._getNextNode(r)},_cleanHeaders:function(t){let e=this._getAllNodesWithTag(t,["h1","h2"]);this._removeNodes(e,function(i){let r=this._getClassWeight(i)<0;return r&&this.log("Removing header with low class weight:",i),r})},_headerDuplicatesTitle:function(t){if(t.tagName!="H1"&&t.tagName!="H2")return!1;var e=this._getInnerText(t,!1);return this.log("Evaluating similarity of header:",e,this._articleTitle),this._textSimilarity(this._articleTitle,e)>.75},_flagIsActive:function(t){return(this._flags&t)>0},_removeFlag:function(t){this._flags=this._flags&~t},_isProbablyVisible:function(t){return(!t.style||t.style.display!="none")&&(!t.style||t.style.visibility!="hidden")&&!t.hasAttribute("hidden")&&(!t.hasAttribute("aria-hidden")||t.getAttribute("aria-hidden")!="true"||t.className&&t.className.indexOf&&t.className.indexOf("fallback-image")!==-1)},parse:function(){if(this._maxElemsToParse>0){var t=this._doc.getElementsByTagName("*").length;if(t>this._maxElemsToParse)throw new Error("Aborting parsing document; "+t+" elements found")}this._unwrapNoscriptImages(this._doc);var e=this._disableJSONLD?{}:this._getJSONLD(this._doc);this._removeScripts(this._doc),this._prepDocument();var i=this._getArticleMetadata(e);this._articleTitle=i.title;var r=this._grabArticle();if(!r)return null;if(this.log("Grabbed: "+r.innerHTML),this._postProcessContent(r),!i.excerpt){var l=r.getElementsByTagName("p");l.length>0&&(i.excerpt=l[0].textContent.trim())}var a=r.textContent;return{title:this._articleTitle,byline:i.byline||this._articleByline,dir:this._articleDir,lang:this._articleLang,content:this._serializer(r),textContent:a,length:a.length,excerpt:i.excerpt,siteName:i.siteName||this._articleSiteName,publishedTime:i.publishedTime}}};typeof module=="object"&&(module.exports=q);\n';
|
|
648
|
-
|
|
649
|
-
// src/libs/browser-search/search.ts
|
|
650
|
-
var import_logger4 = require("@agent-infra/logger");
|
|
651
|
-
|
|
652
|
-
// src/libs/browser-search/utils.ts
|
|
653
|
-
var import_turndown = __toESM(require("turndown"), 1);
|
|
654
|
-
var import_turndown_plugin_gfm = require("turndown-plugin-gfm");
|
|
655
|
-
var import_logger3 = require("@agent-infra/logger");
|
|
656
|
-
var import_user_agents = __toESM(require("user-agents"), 1);
|
|
657
|
-
var parseUrl = (url2) => {
|
|
658
|
-
try {
|
|
659
|
-
return new URL(url2);
|
|
660
|
-
} catch {
|
|
661
|
-
return null;
|
|
662
|
-
}
|
|
663
|
-
};
|
|
664
|
-
var shouldSkipDomain = (url2) => {
|
|
665
|
-
const parsed = parseUrl(url2);
|
|
666
|
-
if (!parsed) return true;
|
|
667
|
-
const { hostname } = parsed;
|
|
668
|
-
return [
|
|
669
|
-
"reddit.com",
|
|
670
|
-
"www.reddit.com",
|
|
671
|
-
"x.com",
|
|
672
|
-
"twitter.com",
|
|
673
|
-
"www.twitter.com",
|
|
674
|
-
"youtube.com",
|
|
675
|
-
"www.youtube.com"
|
|
676
|
-
].includes(hostname);
|
|
677
|
-
};
|
|
678
|
-
async function applyStealthScripts(page) {
|
|
679
|
-
const userAgent = new import_user_agents.default({
|
|
680
|
-
deviceCategory: "desktop"
|
|
681
|
-
}).toString();
|
|
682
|
-
await page.setBypassCSP(true);
|
|
683
|
-
await page.setUserAgent(userAgent);
|
|
684
|
-
await page.evaluate(() => {
|
|
685
|
-
Object.defineProperty(navigator, "webdriver", {
|
|
686
|
-
get: () => void 0
|
|
687
|
-
});
|
|
688
|
-
Object.defineProperty(navigator, "languages", {
|
|
689
|
-
get: () => ["en-US", "en"]
|
|
690
|
-
});
|
|
691
|
-
Object.defineProperty(navigator, "plugins", {
|
|
692
|
-
get: () => [{}, {}, {}, {}, {}]
|
|
693
|
-
});
|
|
694
|
-
Object.defineProperty(navigator, "headless", {
|
|
695
|
-
get: () => false
|
|
696
|
-
});
|
|
697
|
-
const originalQuery = window.navigator.permissions.query;
|
|
698
|
-
window.navigator.permissions.query = (parameters) => parameters.name === "notifications" ? Promise.resolve({
|
|
699
|
-
state: Notification.permission
|
|
700
|
-
}) : originalQuery(parameters);
|
|
701
|
-
});
|
|
702
|
-
}
|
|
703
|
-
async function interceptRequest(page) {
|
|
704
|
-
await applyStealthScripts(page);
|
|
705
|
-
await page.setRequestInterception(true);
|
|
706
|
-
page.on("request", (request) => {
|
|
707
|
-
const resourceType = request.resourceType();
|
|
708
|
-
if (resourceType !== "document") {
|
|
709
|
-
return request.abort();
|
|
710
|
-
}
|
|
711
|
-
if (request.isNavigationRequest()) {
|
|
712
|
-
return request.continue();
|
|
713
|
-
}
|
|
714
|
-
return request.abort();
|
|
715
|
-
});
|
|
716
|
-
}
|
|
717
|
-
function extractPageInformation(window2, readabilityScript) {
|
|
718
|
-
const Readability = new Function(
|
|
719
|
-
"module",
|
|
720
|
-
`${readabilityScript}
|
|
721
|
-
return module.exports`
|
|
722
|
-
)({});
|
|
723
|
-
const document2 = window2.document;
|
|
724
|
-
document2.querySelectorAll(
|
|
725
|
-
"script,noscript,style,link,svg,img,video,iframe,canvas,.reflist"
|
|
726
|
-
).forEach((el) => el.remove());
|
|
727
|
-
const article = new Readability(document2).parse();
|
|
728
|
-
const content = article?.content || "";
|
|
729
|
-
const title = document2.title;
|
|
730
|
-
return {
|
|
731
|
-
content,
|
|
732
|
-
title: article?.title || title
|
|
733
|
-
};
|
|
734
|
-
}
|
|
735
|
-
function toMarkdown(html, options = {}) {
|
|
736
|
-
if (!html) return "";
|
|
737
|
-
try {
|
|
738
|
-
const {
|
|
739
|
-
codeBlockStyle = "fenced",
|
|
740
|
-
headingStyle = "atx",
|
|
741
|
-
emDelimiter = "*",
|
|
742
|
-
strongDelimiter = "**",
|
|
743
|
-
gfmExtension = true
|
|
744
|
-
} = options;
|
|
745
|
-
const turndown = new import_turndown.default({
|
|
746
|
-
codeBlockStyle,
|
|
747
|
-
headingStyle,
|
|
748
|
-
emDelimiter,
|
|
749
|
-
strongDelimiter
|
|
750
|
-
});
|
|
751
|
-
if (gfmExtension) {
|
|
752
|
-
turndown.use(import_turndown_plugin_gfm.gfm);
|
|
753
|
-
}
|
|
754
|
-
return turndown.turndown(html);
|
|
755
|
-
} catch (error) {
|
|
756
|
-
import_logger3.defaultLogger.error("Error converting HTML to Markdown:", error);
|
|
757
|
-
return html;
|
|
758
|
-
}
|
|
759
|
-
}
|
|
760
|
-
|
|
761
|
-
// src/libs/browser-search/queue.ts
|
|
762
|
-
var PromiseQueue = class {
|
|
763
|
-
queue = [];
|
|
764
|
-
concurrency;
|
|
765
|
-
running = 0;
|
|
766
|
-
results = [];
|
|
767
|
-
constructor(concurrency = 1) {
|
|
768
|
-
this.concurrency = concurrency;
|
|
769
|
-
}
|
|
770
|
-
add(task) {
|
|
771
|
-
return new Promise((resolve, reject) => {
|
|
772
|
-
this.queue.push(async () => {
|
|
773
|
-
try {
|
|
774
|
-
const result = await task();
|
|
775
|
-
resolve(result);
|
|
776
|
-
return result;
|
|
777
|
-
} catch (error) {
|
|
778
|
-
reject(error);
|
|
779
|
-
throw error;
|
|
780
|
-
}
|
|
781
|
-
});
|
|
782
|
-
this.run();
|
|
783
|
-
});
|
|
784
|
-
}
|
|
785
|
-
async run() {
|
|
786
|
-
if (this.running >= this.concurrency || this.queue.length === 0) {
|
|
787
|
-
return;
|
|
788
|
-
}
|
|
789
|
-
this.running++;
|
|
790
|
-
const task = this.queue.shift();
|
|
791
|
-
try {
|
|
792
|
-
const result = await task();
|
|
793
|
-
this.results.push(result);
|
|
794
|
-
} catch (error) {
|
|
795
|
-
} finally {
|
|
796
|
-
this.running--;
|
|
797
|
-
this.run();
|
|
798
|
-
}
|
|
799
|
-
}
|
|
800
|
-
async waitAll() {
|
|
801
|
-
while (this.running > 0 || this.queue.length > 0) {
|
|
802
|
-
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
803
|
-
}
|
|
804
|
-
return this.results;
|
|
805
|
-
}
|
|
806
|
-
};
|
|
807
|
-
|
|
808
|
-
// src/libs/browser-search/engines/bing.ts
|
|
809
|
-
var BingSearchEngine = class {
|
|
810
|
-
/**
|
|
811
|
-
* Generates a Bing search URL based on the provided query and options.
|
|
812
|
-
*
|
|
813
|
-
* @param query - The search query string
|
|
814
|
-
* @param options - Search configuration options
|
|
815
|
-
* @param options.count - Number of search results to request (default: 10)
|
|
816
|
-
* @param options.excludeDomains - Array of domain names to exclude from search results
|
|
817
|
-
* @returns Formatted Bing search URL as a string
|
|
818
|
-
*/
|
|
819
|
-
getSearchUrl(query, options) {
|
|
820
|
-
const searchParams = new URLSearchParams({
|
|
821
|
-
q: `${options.excludeDomains && options.excludeDomains.length > 0 ? `${options.excludeDomains.map((domain) => `-site:${domain}`).join(" ")} ` : ""}${query}`,
|
|
822
|
-
count: `${options.count || 10}`
|
|
823
|
-
});
|
|
824
|
-
return `https://www.bing.com/search?${searchParams.toString()}`;
|
|
825
|
-
}
|
|
826
|
-
/**
|
|
827
|
-
* Extracts search results from a Bing search page.
|
|
828
|
-
*
|
|
829
|
-
* @param window - The browser window object containing the loaded Bing search page
|
|
830
|
-
* @returns Array of search results extracted from the page
|
|
831
|
-
*/
|
|
832
|
-
extractSearchResults(window2) {
|
|
833
|
-
const links = [];
|
|
834
|
-
const document2 = window2.document;
|
|
835
|
-
const isValidUrl = (url2) => {
|
|
836
|
-
try {
|
|
837
|
-
new URL(url2);
|
|
838
|
-
return true;
|
|
839
|
-
} catch (error) {
|
|
840
|
-
return false;
|
|
841
|
-
}
|
|
842
|
-
};
|
|
843
|
-
const extractSnippet = (element) => {
|
|
844
|
-
const clone = element.cloneNode(true);
|
|
845
|
-
const titleElements = clone.querySelectorAll("h2");
|
|
846
|
-
titleElements.forEach((el) => el.remove());
|
|
847
|
-
const citeElements = clone.querySelectorAll(".b_attribution");
|
|
848
|
-
citeElements.forEach((el) => el.remove());
|
|
849
|
-
const scriptElements = clone.querySelectorAll("script, style");
|
|
850
|
-
scriptElements.forEach((el) => el.remove());
|
|
851
|
-
const text = Array.from(clone.querySelectorAll("*")).filter((node) => node.textContent?.trim()).map((node) => node.textContent?.trim()).filter(Boolean).reduce((acc, curr) => {
|
|
852
|
-
if (!acc.some(
|
|
853
|
-
(text2) => text2.includes(curr) || curr.includes(text2)
|
|
854
|
-
)) {
|
|
855
|
-
acc.push(curr);
|
|
856
|
-
}
|
|
857
|
-
return acc;
|
|
858
|
-
}, []).join(" ").trim().replace(/\s+/g, " ");
|
|
859
|
-
return text;
|
|
860
|
-
};
|
|
861
|
-
try {
|
|
862
|
-
const elements = document2.querySelectorAll(".b_algo");
|
|
863
|
-
elements.forEach((element) => {
|
|
864
|
-
const titleEl = element.querySelector("h2");
|
|
865
|
-
const urlEl = element.querySelector("h2 a");
|
|
866
|
-
const url2 = urlEl?.getAttribute("href");
|
|
867
|
-
const snippet = extractSnippet(element);
|
|
868
|
-
if (!url2 || !isValidUrl(url2)) return;
|
|
869
|
-
const item = {
|
|
870
|
-
title: titleEl?.textContent || "",
|
|
871
|
-
snippet,
|
|
872
|
-
url: url2,
|
|
873
|
-
content: ""
|
|
874
|
-
};
|
|
875
|
-
if (!item.title || !item.url) return;
|
|
876
|
-
links.push(item);
|
|
877
|
-
});
|
|
878
|
-
} catch (error) {
|
|
879
|
-
console.error("Error extracting search results from Bing:", error);
|
|
880
|
-
throw error;
|
|
881
|
-
}
|
|
882
|
-
return links;
|
|
883
|
-
}
|
|
884
|
-
/**
|
|
885
|
-
* Waits for Bing search results to load completely.
|
|
886
|
-
*
|
|
887
|
-
* @param page - The Puppeteer page object
|
|
888
|
-
* @returns Promise that resolves when search results are loaded
|
|
889
|
-
*/
|
|
890
|
-
async waitForSearchResults(page, timeout) {
|
|
891
|
-
await page.waitForSelector("#b_results", {
|
|
892
|
-
timeout: timeout ?? 1e4
|
|
893
|
-
});
|
|
894
|
-
}
|
|
895
|
-
};
|
|
896
|
-
|
|
897
|
-
// src/libs/browser-search/engines/baidu.ts
|
|
898
|
-
var BaiduSearchEngine = class {
|
|
899
|
-
/**
|
|
900
|
-
* Generates a Baidu search URL based on the provided query and options.
|
|
901
|
-
*
|
|
902
|
-
* @param query - The search query string
|
|
903
|
-
* @param options - Search configuration options
|
|
904
|
-
* @param options.count - Number of search results to request (default: 10)
|
|
905
|
-
* @param options.excludeDomains - Array of domain names to exclude from search results
|
|
906
|
-
* @returns Formatted Baidu search URL as a string
|
|
907
|
-
*/
|
|
908
|
-
getSearchUrl(query, options) {
|
|
909
|
-
const excludeDomainsQuery = options.excludeDomains && options.excludeDomains.length > 0 ? options.excludeDomains.map((domain) => `-site:${domain}`).join(" ") : "";
|
|
910
|
-
const searchParams = new URLSearchParams({
|
|
911
|
-
wd: excludeDomainsQuery ? `${excludeDomainsQuery} ${query}` : query,
|
|
912
|
-
rn: `${options.count || 10}`
|
|
913
|
-
// rn is the parameter for result count
|
|
914
|
-
});
|
|
915
|
-
return `https://www.baidu.com/s?${searchParams.toString()}`;
|
|
916
|
-
}
|
|
917
|
-
/**
|
|
918
|
-
* Extracts search results from a Baidu search page.
|
|
919
|
-
*
|
|
920
|
-
* @param window - The browser window object containing the loaded Baidu search page
|
|
921
|
-
* @returns Array of search results extracted from the page
|
|
922
|
-
*/
|
|
923
|
-
extractSearchResults(window2) {
|
|
924
|
-
const links = [];
|
|
925
|
-
const document2 = window2.document;
|
|
926
|
-
try {
|
|
927
|
-
const elements = document2.querySelectorAll(".result");
|
|
928
|
-
elements.forEach((element) => {
|
|
929
|
-
const titleEl = element.querySelector(".t a");
|
|
930
|
-
const url2 = titleEl?.getAttribute("href");
|
|
931
|
-
const snippetEl = element.querySelector(".c-span-last .content-right_2s-H4");
|
|
932
|
-
if (!url2) return;
|
|
933
|
-
const item = {
|
|
934
|
-
title: titleEl?.textContent || "",
|
|
935
|
-
url: url2,
|
|
936
|
-
// Note: Baidu uses redirects, we'll need to follow them
|
|
937
|
-
snippet: snippetEl?.textContent || "",
|
|
938
|
-
content: ""
|
|
939
|
-
};
|
|
940
|
-
if (!item.title || !item.url) return;
|
|
941
|
-
links.push(item);
|
|
942
|
-
});
|
|
943
|
-
} catch (error) {
|
|
944
|
-
console.error("Error extracting search results from Baidu:", error);
|
|
945
|
-
}
|
|
946
|
-
return links;
|
|
947
|
-
}
|
|
948
|
-
/**
|
|
949
|
-
* Waits for Bing search results to load completely.
|
|
950
|
-
*
|
|
951
|
-
* @param page - The Puppeteer page object
|
|
952
|
-
* @returns Promise that resolves when search results are loaded
|
|
953
|
-
*/
|
|
954
|
-
async waitForSearchResults(page, timeout) {
|
|
955
|
-
await page.waitForSelector("#page", {
|
|
956
|
-
timeout: timeout ?? 1e4
|
|
957
|
-
});
|
|
958
|
-
}
|
|
959
|
-
};
|
|
960
|
-
|
|
961
|
-
// src/libs/browser-search/engines/sogou.ts
|
|
962
|
-
var SogouSearchEngine = class {
|
|
963
|
-
/**
|
|
964
|
-
* Generates a Sogou search URL based on the provided query and options.
|
|
965
|
-
*
|
|
966
|
-
* @param query - The search query string
|
|
967
|
-
* @param options - Search configuration options
|
|
968
|
-
* @param options.count - Number of search results to request (default: 10)
|
|
969
|
-
* @param options.excludeDomains - Array of domain names to exclude from search results
|
|
970
|
-
* @returns Formatted Sogou search URL as a string
|
|
971
|
-
*/
|
|
972
|
-
getSearchUrl(query, options) {
|
|
973
|
-
const { count = 10, excludeDomains = [] } = options;
|
|
974
|
-
const excludeDomainsQuery = excludeDomains && excludeDomains.length > 0 ? excludeDomains.map((domain) => `-site:${domain}`).join(" ") : "";
|
|
975
|
-
const searchParams = new URLSearchParams({
|
|
976
|
-
query: `${excludeDomainsQuery ? `${excludeDomainsQuery} ` : ""}${query}`,
|
|
977
|
-
num: `${count}`
|
|
978
|
-
});
|
|
979
|
-
return `https://www.sogou.com/web?${searchParams.toString()}`;
|
|
980
|
-
}
|
|
981
|
-
/**
|
|
982
|
-
* !NOTE: This function runs in the context of the browser page, not Node.js
|
|
983
|
-
*
|
|
984
|
-
* Extract search results from Sogou
|
|
985
|
-
* @param window - The window object
|
|
986
|
-
* @returns Search results
|
|
987
|
-
*/
|
|
988
|
-
extractSearchResults(window2) {
|
|
989
|
-
const links = [];
|
|
990
|
-
const document2 = window2.document;
|
|
991
|
-
const isValidUrl = (url2) => {
|
|
992
|
-
try {
|
|
993
|
-
new URL(url2);
|
|
994
|
-
return true;
|
|
995
|
-
} catch (error) {
|
|
996
|
-
return false;
|
|
997
|
-
}
|
|
998
|
-
};
|
|
999
|
-
const EndPoints = "https://www.sogou.com";
|
|
1000
|
-
const SELECTOR = {
|
|
1001
|
-
results: ".results .vrwrap",
|
|
1002
|
-
resultTitle: ".vr-title",
|
|
1003
|
-
resultLink: ".vr-title > a",
|
|
1004
|
-
resultSnippet: [".star-wiki", ".fz-mid", ".attribute-centent"],
|
|
1005
|
-
resultSnippetExcluded: [".text-lightgray", ".zan-box", ".tag-website"],
|
|
1006
|
-
related: "#main .vrwrap.middle-better-hintBox .hint-mid"
|
|
1007
|
-
};
|
|
1008
|
-
try {
|
|
1009
|
-
const elements = document2.querySelectorAll(SELECTOR.results);
|
|
1010
|
-
elements.forEach((element) => {
|
|
1011
|
-
const titleEl = element.querySelector(SELECTOR.resultTitle);
|
|
1012
|
-
let url2 = element.querySelector(SELECTOR.resultLink)?.getAttribute("href");
|
|
1013
|
-
const snippets = SELECTOR.resultSnippet.map((selector) => {
|
|
1014
|
-
SELECTOR.resultSnippetExcluded.forEach((excludedSelector) => {
|
|
1015
|
-
const el2 = element.querySelector(excludedSelector);
|
|
1016
|
-
el2?.remove();
|
|
1017
|
-
});
|
|
1018
|
-
const el = element.querySelector(selector);
|
|
1019
|
-
return el?.textContent?.trim() || "";
|
|
1020
|
-
});
|
|
1021
|
-
if (!url2?.includes("http")) url2 = `${EndPoints}${url2}`;
|
|
1022
|
-
if (!url2?.trim() || !isValidUrl(url2)) return;
|
|
1023
|
-
const item = {
|
|
1024
|
-
title: titleEl?.textContent?.trim() || "",
|
|
1025
|
-
url: url2,
|
|
1026
|
-
snippet: snippets.join(""),
|
|
1027
|
-
content: ""
|
|
1028
|
-
};
|
|
1029
|
-
if (!item.title || !item.url) return;
|
|
1030
|
-
links.push(item);
|
|
1031
|
-
});
|
|
1032
|
-
} catch (error) {
|
|
1033
|
-
const msg = error instanceof Error ? error.message : String(error);
|
|
1034
|
-
console.error("Error extracting search results from Sogou:", msg);
|
|
1035
|
-
throw error;
|
|
1036
|
-
}
|
|
1037
|
-
return links;
|
|
1038
|
-
}
|
|
1039
|
-
/**
|
|
1040
|
-
* Waits for Bing search results to load completely.
|
|
1041
|
-
*
|
|
1042
|
-
* @param page - The Puppeteer page object
|
|
1043
|
-
* @returns Promise that resolves when search results are loaded
|
|
1044
|
-
*/
|
|
1045
|
-
async waitForSearchResults(page, timeout) {
|
|
1046
|
-
await page.waitForSelector("#pagebar_container", {
|
|
1047
|
-
timeout: timeout ?? 1e4
|
|
1048
|
-
});
|
|
1049
|
-
}
|
|
1050
|
-
};
|
|
1051
|
-
|
|
1052
|
-
// src/libs/browser-search/engines/google.ts
|
|
1053
|
-
var GoogleSearchEngine = class {
|
|
1054
|
-
/**
|
|
1055
|
-
* Generates a Google search URL based on the provided query and options.
|
|
1056
|
-
*
|
|
1057
|
-
* @param query - The search query string
|
|
1058
|
-
* @param options - Search configuration options
|
|
1059
|
-
* @param options.count - Number of search results to request (default: 10)
|
|
1060
|
-
* @param options.excludeDomains - Array of domain names to exclude from search results
|
|
1061
|
-
* @returns Formatted Google search URL as a string
|
|
1062
|
-
*/
|
|
1063
|
-
getSearchUrl(query, options) {
|
|
1064
|
-
const searchParams = new URLSearchParams({
|
|
1065
|
-
q: `${options.excludeDomains && options.excludeDomains.length > 0 ? `${options.excludeDomains.map((domain) => `-site:${domain}`).join(" ")} ` : ""}${query}`,
|
|
1066
|
-
num: `${options.count || 10}`
|
|
1067
|
-
});
|
|
1068
|
-
searchParams.set("udm", "14");
|
|
1069
|
-
return `https://www.google.com/search?${searchParams.toString()}`;
|
|
1070
|
-
}
|
|
1071
|
-
/**
|
|
1072
|
-
* Extracts search results from a Google search page.
|
|
1073
|
-
*
|
|
1074
|
-
* @param window - The browser window object containing the loaded Google search page
|
|
1075
|
-
* @returns Array of search results extracted from the page
|
|
1076
|
-
*/
|
|
1077
|
-
extractSearchResults(window2) {
|
|
1078
|
-
const links = [];
|
|
1079
|
-
const document2 = window2.document;
|
|
1080
|
-
const isValidUrl = (url2) => {
|
|
1081
|
-
try {
|
|
1082
|
-
new URL(url2);
|
|
1083
|
-
return true;
|
|
1084
|
-
} catch (error) {
|
|
1085
|
-
return false;
|
|
1086
|
-
}
|
|
1087
|
-
};
|
|
1088
|
-
const extractSnippet = (element) => {
|
|
1089
|
-
const clone = element.cloneNode(true);
|
|
1090
|
-
const titleElements = clone.querySelectorAll("h3");
|
|
1091
|
-
titleElements.forEach((el) => el.remove());
|
|
1092
|
-
const citeElements = clone.querySelectorAll("cite");
|
|
1093
|
-
citeElements.forEach((el) => el.remove());
|
|
1094
|
-
const scriptElements = clone.querySelectorAll("script, style");
|
|
1095
|
-
scriptElements.forEach((el) => el.remove());
|
|
1096
|
-
const text = Array.from(clone.querySelectorAll("*")).filter((node) => node.textContent?.trim()).map((node) => node.textContent?.trim()).filter(Boolean).reduce((acc, curr) => {
|
|
1097
|
-
if (!acc.some(
|
|
1098
|
-
(text2) => text2.includes(curr) || curr.includes(text2)
|
|
1099
|
-
)) {
|
|
1100
|
-
acc.push(curr);
|
|
1101
|
-
}
|
|
1102
|
-
return acc;
|
|
1103
|
-
}, []).join(" ").trim().replace(/\s+/g, " ");
|
|
1104
|
-
return text;
|
|
1105
|
-
};
|
|
1106
|
-
try {
|
|
1107
|
-
const elements = document2.querySelectorAll(".tF2Cxc");
|
|
1108
|
-
elements.forEach((element) => {
|
|
1109
|
-
const titleEl = element.querySelector("h3");
|
|
1110
|
-
const urlEl = element.querySelector("a");
|
|
1111
|
-
const url2 = urlEl?.getAttribute("href");
|
|
1112
|
-
const snippet = extractSnippet(element.parentElement || element);
|
|
1113
|
-
if (!url2 || !isValidUrl(url2)) return;
|
|
1114
|
-
const item = {
|
|
1115
|
-
title: titleEl?.textContent || "",
|
|
1116
|
-
url: url2,
|
|
1117
|
-
snippet,
|
|
1118
|
-
content: ""
|
|
1119
|
-
};
|
|
1120
|
-
if (!item.title || !item.url) return;
|
|
1121
|
-
links.push(item);
|
|
1122
|
-
});
|
|
1123
|
-
} catch (error) {
|
|
1124
|
-
console.error(error);
|
|
1125
|
-
}
|
|
1126
|
-
return links;
|
|
1127
|
-
}
|
|
1128
|
-
/**
|
|
1129
|
-
* Waits for Google search results to load completely.
|
|
1130
|
-
*
|
|
1131
|
-
* @param page - The Puppeteer page object
|
|
1132
|
-
* @returns Promise that resolves when search results are loaded
|
|
1133
|
-
*/
|
|
1134
|
-
async waitForSearchResults(page, timeout) {
|
|
1135
|
-
await page.waitForSelector("#search", {
|
|
1136
|
-
timeout: timeout ?? 1e4
|
|
1137
|
-
});
|
|
1138
|
-
}
|
|
1139
|
-
};
|
|
1140
|
-
|
|
1141
|
-
// src/libs/browser-search/engines/get.ts
|
|
1142
|
-
function getSearchEngine(engine) {
|
|
1143
|
-
switch (engine) {
|
|
1144
|
-
case "bing":
|
|
1145
|
-
return new BingSearchEngine();
|
|
1146
|
-
case "baidu":
|
|
1147
|
-
return new BaiduSearchEngine();
|
|
1148
|
-
case "sogou":
|
|
1149
|
-
return new SogouSearchEngine();
|
|
1150
|
-
case "google":
|
|
1151
|
-
return new GoogleSearchEngine();
|
|
1152
|
-
default:
|
|
1153
|
-
return new BingSearchEngine();
|
|
1154
|
-
}
|
|
1155
|
-
}
|
|
1156
|
-
|
|
1157
|
-
// src/libs/browser-search/search.ts
|
|
1158
|
-
var BrowserSearch = class {
|
|
1159
|
-
constructor(config = {}) {
|
|
1160
|
-
this.config = config;
|
|
1161
|
-
this.logger = config?.logger ?? import_logger4.defaultLogger;
|
|
1162
|
-
this.browser = config.browser ?? new LocalBrowser({ logger: this.logger });
|
|
1163
|
-
this.defaultEngine = config.defaultEngine ?? "bing";
|
|
1164
|
-
}
|
|
1165
|
-
logger;
|
|
1166
|
-
browser;
|
|
1167
|
-
isBrowserOpen = false;
|
|
1168
|
-
defaultEngine;
|
|
1169
|
-
/**
|
|
1170
|
-
* Search web and extract content from result pages
|
|
1171
|
-
*/
|
|
1172
|
-
async perform(options) {
|
|
1173
|
-
this.logger.info("Starting search with options:", options);
|
|
1174
|
-
const queries = Array.isArray(options.query) ? options.query : [options.query];
|
|
1175
|
-
const excludeDomains = options.excludeDomains || [];
|
|
1176
|
-
const count = options.count && Math.max(3, Math.floor(options.count / queries.length));
|
|
1177
|
-
const engine = options.engine || this.defaultEngine;
|
|
1178
|
-
try {
|
|
1179
|
-
if (!this.isBrowserOpen) {
|
|
1180
|
-
this.logger.info("Launching browser");
|
|
1181
|
-
await this.browser.launch(this.config.browserOptions);
|
|
1182
|
-
this.isBrowserOpen = true;
|
|
1183
|
-
} else {
|
|
1184
|
-
this.logger.info("Using existing browser instance");
|
|
1185
|
-
}
|
|
1186
|
-
const queue = new PromiseQueue(options.concurrency || 15);
|
|
1187
|
-
const visitedUrls = /* @__PURE__ */ new Set();
|
|
1188
|
-
const results = await Promise.all(
|
|
1189
|
-
queries.map(
|
|
1190
|
-
(query) => this.search(this.browser, {
|
|
1191
|
-
query,
|
|
1192
|
-
count,
|
|
1193
|
-
queue,
|
|
1194
|
-
visitedUrls,
|
|
1195
|
-
excludeDomains,
|
|
1196
|
-
truncate: options.truncate,
|
|
1197
|
-
needVisitedUrls: options.needVisitedUrls,
|
|
1198
|
-
engine
|
|
1199
|
-
})
|
|
1200
|
-
)
|
|
1201
|
-
);
|
|
1202
|
-
this.logger.success("Search completed successfully");
|
|
1203
|
-
return results.flat();
|
|
1204
|
-
} catch (error) {
|
|
1205
|
-
this.logger.error("Search failed:", error);
|
|
1206
|
-
return [];
|
|
1207
|
-
} finally {
|
|
1208
|
-
if (!options.keepBrowserOpen && this.isBrowserOpen) {
|
|
1209
|
-
await this.closeBrowser();
|
|
1210
|
-
}
|
|
1211
|
-
}
|
|
1212
|
-
}
|
|
1213
|
-
/**
|
|
1214
|
-
* Explicitly close the browser instance
|
|
1215
|
-
*/
|
|
1216
|
-
async closeBrowser() {
|
|
1217
|
-
if (this.isBrowserOpen) {
|
|
1218
|
-
this.logger.info("Closing browser");
|
|
1219
|
-
await this.browser.close();
|
|
1220
|
-
this.isBrowserOpen = false;
|
|
1221
|
-
}
|
|
1222
|
-
}
|
|
1223
|
-
async search(browser, options) {
|
|
1224
|
-
const searchEngine = getSearchEngine(options.engine);
|
|
1225
|
-
const url2 = searchEngine.getSearchUrl(options.query, {
|
|
1226
|
-
count: options.count,
|
|
1227
|
-
excludeDomains: options.excludeDomains
|
|
1228
|
-
});
|
|
1229
|
-
this.logger.info(`Searching with ${options.engine} engine: ${url2}`);
|
|
1230
|
-
let links = await browser.evaluateOnNewPage({
|
|
1231
|
-
url: url2,
|
|
1232
|
-
waitForOptions: {
|
|
1233
|
-
waitUntil: "networkidle0"
|
|
1234
|
-
},
|
|
1235
|
-
pageFunction: searchEngine.extractSearchResults,
|
|
1236
|
-
pageFunctionParams: [],
|
|
1237
|
-
beforePageLoad: async (page) => {
|
|
1238
|
-
await interceptRequest(page);
|
|
1239
|
-
},
|
|
1240
|
-
afterPageLoad: async (page) => {
|
|
1241
|
-
if (searchEngine.waitForSearchResults)
|
|
1242
|
-
await searchEngine.waitForSearchResults(page, 1e4);
|
|
1243
|
-
}
|
|
1244
|
-
});
|
|
1245
|
-
this.logger.info(`Fetched ${links?.length ?? 0} links`);
|
|
1246
|
-
links = links?.filter((link) => {
|
|
1247
|
-
if (options.visitedUrls.has(link.url)) return false;
|
|
1248
|
-
options.visitedUrls.add(link.url);
|
|
1249
|
-
return !shouldSkipDomain(link.url);
|
|
1250
|
-
}) || [];
|
|
1251
|
-
if (!links.length) {
|
|
1252
|
-
this.logger.info("No valid links found");
|
|
1253
|
-
return [];
|
|
1254
|
-
}
|
|
1255
|
-
const results = await Promise.allSettled(
|
|
1256
|
-
options.needVisitedUrls ? links.map(
|
|
1257
|
-
(item) => options.queue.add(() => this.visitLink(this.browser, item))
|
|
1258
|
-
) : links
|
|
1259
|
-
);
|
|
1260
|
-
return results.map((result) => {
|
|
1261
|
-
if (result.status === "rejected" || !result.value) return null;
|
|
1262
|
-
return {
|
|
1263
|
-
...result.value,
|
|
1264
|
-
content: options.truncate ? result.value.content.slice(0, options.truncate) : result.value.content
|
|
1265
|
-
};
|
|
1266
|
-
}).filter((v) => v !== null);
|
|
1267
|
-
}
|
|
1268
|
-
async visitLink(browser, item) {
|
|
1269
|
-
try {
|
|
1270
|
-
this.logger.info("Visiting link:", item.url);
|
|
1271
|
-
const result = await browser.evaluateOnNewPage({
|
|
1272
|
-
url: item.url,
|
|
1273
|
-
pageFunction: extractPageInformation,
|
|
1274
|
-
pageFunctionParams: [READABILITY_SCRIPT],
|
|
1275
|
-
beforePageLoad: async (page) => {
|
|
1276
|
-
await interceptRequest(page);
|
|
1277
|
-
}
|
|
1278
|
-
});
|
|
1279
|
-
if (result) {
|
|
1280
|
-
const content = toMarkdown(result.content);
|
|
1281
|
-
return { ...result, url: item.url, content, snippet: item.snippet };
|
|
1282
|
-
}
|
|
1283
|
-
} catch (e) {
|
|
1284
|
-
this.logger.error("Failed to visit link:", e);
|
|
1285
|
-
}
|
|
1286
|
-
}
|
|
1287
|
-
};
|
|
1288
|
-
|
|
1289
|
-
// src/search/local.ts
|
|
1290
|
-
var import_logger5 = require("@agent-infra/logger");
|
|
1291
|
-
var logger2 = new import_logger5.ConsoleLogger("[LocalSearch]");
|
|
1292
|
-
async function localSearch(options) {
|
|
1293
|
-
const { query, limit = 10 } = options;
|
|
1294
|
-
let { engines = "all" } = options;
|
|
1295
|
-
const browserSearch = new BrowserSearch({
|
|
1296
|
-
logger: logger2,
|
|
1297
|
-
browserOptions: {
|
|
1298
|
-
headless: true
|
|
1299
|
-
}
|
|
1300
|
-
});
|
|
1301
|
-
if (engines === "all") {
|
|
1302
|
-
engines = "bing,google,baidu,sogou";
|
|
1303
|
-
}
|
|
1304
|
-
try {
|
|
1305
|
-
const engineList = engines.split(",");
|
|
1306
|
-
if (engineList.length === 0) {
|
|
1307
|
-
throw new Error("engines is required");
|
|
1308
|
-
}
|
|
1309
|
-
const results = [];
|
|
1310
|
-
for (const engine of engineList) {
|
|
1311
|
-
const res = await browserSearch.perform({
|
|
1312
|
-
query,
|
|
1313
|
-
count: limit,
|
|
1314
|
-
engine,
|
|
1315
|
-
needVisitedUrls: false
|
|
1316
|
-
});
|
|
1317
|
-
if (res.length > 0) {
|
|
1318
|
-
results.push(...res);
|
|
1319
|
-
break;
|
|
1320
|
-
}
|
|
1321
|
-
}
|
|
1322
|
-
logger2.info(`Found ${results.length} results for ${query}`, results);
|
|
1323
|
-
return {
|
|
1324
|
-
results,
|
|
1325
|
-
success: true
|
|
1326
|
-
};
|
|
1327
|
-
} finally {
|
|
1328
|
-
await browserSearch.closeBrowser();
|
|
1329
|
-
}
|
|
1330
|
-
}
|
|
1331
|
-
|
|
1332
|
-
// src/tools.ts
|
|
1333
|
-
var SEARCH_TOOL = {
|
|
1334
|
-
name: "one_search",
|
|
1335
|
-
description: "Search and retrieve content from web pages. Returns SERP results by default (url, title, description).",
|
|
1336
|
-
inputSchema: {
|
|
1337
|
-
type: "object",
|
|
1338
|
-
properties: {
|
|
1339
|
-
query: {
|
|
1340
|
-
type: "string",
|
|
1341
|
-
description: "Search query string"
|
|
1342
|
-
},
|
|
1343
|
-
limit: {
|
|
1344
|
-
type: "number",
|
|
1345
|
-
description: "Maximum number of results to return (default: 10)"
|
|
1346
|
-
},
|
|
1347
|
-
language: {
|
|
1348
|
-
type: "string",
|
|
1349
|
-
description: "Language code for search results (default: auto)"
|
|
1350
|
-
},
|
|
1351
|
-
categories: {
|
|
1352
|
-
type: "string",
|
|
1353
|
-
enum: [
|
|
1354
|
-
"general",
|
|
1355
|
-
"news",
|
|
1356
|
-
"images",
|
|
1357
|
-
"videos",
|
|
1358
|
-
"it",
|
|
1359
|
-
"science",
|
|
1360
|
-
"map",
|
|
1361
|
-
"music",
|
|
1362
|
-
"files",
|
|
1363
|
-
"social_media"
|
|
1364
|
-
],
|
|
1365
|
-
description: "Categories to search for (default: general)"
|
|
1366
|
-
},
|
|
1367
|
-
timeRange: {
|
|
1368
|
-
type: "string",
|
|
1369
|
-
description: "Time range for search results (default: all)",
|
|
1370
|
-
enum: [
|
|
1371
|
-
"all",
|
|
1372
|
-
"day",
|
|
1373
|
-
"week",
|
|
1374
|
-
"month",
|
|
1375
|
-
"year"
|
|
1376
|
-
]
|
|
1377
|
-
}
|
|
1378
|
-
},
|
|
1379
|
-
required: ["query"]
|
|
1380
|
-
}
|
|
1381
|
-
};
|
|
1382
|
-
var MAP_TOOL = {
|
|
1383
|
-
name: "one_map",
|
|
1384
|
-
description: "Discover URLs from a starting point. Can use both sitemap.xml and HTML link discovery.",
|
|
1385
|
-
inputSchema: {
|
|
1386
|
-
type: "object",
|
|
1387
|
-
properties: {
|
|
1388
|
-
url: {
|
|
1389
|
-
type: "string",
|
|
1390
|
-
description: "Starting URL for URL discovery"
|
|
1391
|
-
},
|
|
1392
|
-
search: {
|
|
1393
|
-
type: "string",
|
|
1394
|
-
description: "Optional search term to filter URLs"
|
|
1395
|
-
},
|
|
1396
|
-
ignoreSitemap: {
|
|
1397
|
-
type: "boolean",
|
|
1398
|
-
description: "Skip sitemap.xml discovery and only use HTML links"
|
|
1399
|
-
},
|
|
1400
|
-
sitemapOnly: {
|
|
1401
|
-
type: "boolean",
|
|
1402
|
-
description: "Only use sitemap.xml for discovery, ignore HTML links"
|
|
1403
|
-
},
|
|
1404
|
-
includeSubdomains: {
|
|
1405
|
-
type: "boolean",
|
|
1406
|
-
description: "Include URLs from subdomains in results"
|
|
1407
|
-
},
|
|
1408
|
-
limit: {
|
|
1409
|
-
type: "number",
|
|
1410
|
-
description: "Maximum number of URLs to return"
|
|
1411
|
-
}
|
|
1412
|
-
},
|
|
1413
|
-
required: ["url"]
|
|
1414
|
-
}
|
|
1415
|
-
};
|
|
1416
|
-
var SCRAPE_TOOL = {
|
|
1417
|
-
name: "one_scrape",
|
|
1418
|
-
description: "Scrape a single webpage with advanced options for content extraction. Supports various formats including markdown, HTML, and screenshots. Can execute custom actions like clicking or scrolling before scraping.",
|
|
1419
|
-
inputSchema: {
|
|
1420
|
-
type: "object",
|
|
1421
|
-
properties: {
|
|
1422
|
-
url: {
|
|
1423
|
-
type: "string",
|
|
1424
|
-
description: "The URL to scrape"
|
|
1425
|
-
},
|
|
1426
|
-
formats: {
|
|
1427
|
-
type: "array",
|
|
1428
|
-
items: {
|
|
1429
|
-
type: "string",
|
|
1430
|
-
enum: [
|
|
1431
|
-
"markdown",
|
|
1432
|
-
"html",
|
|
1433
|
-
"rawHtml",
|
|
1434
|
-
"screenshot",
|
|
1435
|
-
"links",
|
|
1436
|
-
"screenshot@fullPage",
|
|
1437
|
-
"extract"
|
|
1438
|
-
]
|
|
1439
|
-
},
|
|
1440
|
-
description: "Content formats to extract (default: ['markdown'])"
|
|
1441
|
-
},
|
|
1442
|
-
onlyMainContent: {
|
|
1443
|
-
type: "boolean",
|
|
1444
|
-
description: "Extract only the main content, filtering out navigation, footers, etc."
|
|
1445
|
-
},
|
|
1446
|
-
includeTags: {
|
|
1447
|
-
type: "array",
|
|
1448
|
-
items: { type: "string" },
|
|
1449
|
-
description: "HTML tags to specifically include in extraction"
|
|
1450
|
-
},
|
|
1451
|
-
excludeTags: {
|
|
1452
|
-
type: "array",
|
|
1453
|
-
items: { type: "string" },
|
|
1454
|
-
description: "HTML tags to exclude from extraction"
|
|
1455
|
-
},
|
|
1456
|
-
waitFor: {
|
|
1457
|
-
type: "number",
|
|
1458
|
-
description: "Time in milliseconds to wait for dynamic content to load"
|
|
1459
|
-
},
|
|
1460
|
-
timeout: {
|
|
1461
|
-
type: "number",
|
|
1462
|
-
description: "Maximum time in milliseconds to wait for the page to load"
|
|
1463
|
-
},
|
|
1464
|
-
actions: {
|
|
1465
|
-
type: "array",
|
|
1466
|
-
items: {
|
|
1467
|
-
type: "object",
|
|
1468
|
-
properties: {
|
|
1469
|
-
type: {
|
|
1470
|
-
type: "string",
|
|
1471
|
-
enum: [
|
|
1472
|
-
"wait",
|
|
1473
|
-
"click",
|
|
1474
|
-
"screenshot",
|
|
1475
|
-
"write",
|
|
1476
|
-
"press",
|
|
1477
|
-
"scroll",
|
|
1478
|
-
"scrape",
|
|
1479
|
-
"executeJavascript"
|
|
1480
|
-
],
|
|
1481
|
-
description: "Type of action to perform"
|
|
1482
|
-
},
|
|
1483
|
-
selector: {
|
|
1484
|
-
type: "string",
|
|
1485
|
-
description: "CSS selector for the target element"
|
|
1486
|
-
},
|
|
1487
|
-
milliseconds: {
|
|
1488
|
-
type: "number",
|
|
1489
|
-
description: "Time to wait in milliseconds (for wait action)"
|
|
1490
|
-
},
|
|
1491
|
-
text: {
|
|
1492
|
-
type: "string",
|
|
1493
|
-
description: "Text to write (for write action)"
|
|
1494
|
-
},
|
|
1495
|
-
key: {
|
|
1496
|
-
type: "string",
|
|
1497
|
-
description: "Key to press (for press action)"
|
|
1498
|
-
},
|
|
1499
|
-
direction: {
|
|
1500
|
-
type: "string",
|
|
1501
|
-
enum: ["up", "down"],
|
|
1502
|
-
description: "Scroll direction"
|
|
1503
|
-
},
|
|
1504
|
-
script: {
|
|
1505
|
-
type: "string",
|
|
1506
|
-
description: "JavaScript code to execute"
|
|
1507
|
-
},
|
|
1508
|
-
fullPage: {
|
|
1509
|
-
type: "boolean",
|
|
1510
|
-
description: "Take full page screenshot"
|
|
1511
|
-
}
|
|
1512
|
-
},
|
|
1513
|
-
required: ["type"]
|
|
1514
|
-
},
|
|
1515
|
-
description: "List of actions to perform before scraping"
|
|
1516
|
-
},
|
|
1517
|
-
extract: {
|
|
1518
|
-
type: "object",
|
|
1519
|
-
properties: {
|
|
1520
|
-
schema: {
|
|
1521
|
-
type: "object",
|
|
1522
|
-
description: "Schema for structured data extraction"
|
|
1523
|
-
},
|
|
1524
|
-
systemPrompt: {
|
|
1525
|
-
type: "string",
|
|
1526
|
-
description: "System prompt for LLM extraction"
|
|
1527
|
-
},
|
|
1528
|
-
prompt: {
|
|
1529
|
-
type: "string",
|
|
1530
|
-
description: "User prompt for LLM extraction"
|
|
1531
|
-
}
|
|
1532
|
-
},
|
|
1533
|
-
description: "Configuration for structured data extraction"
|
|
1534
|
-
},
|
|
1535
|
-
mobile: {
|
|
1536
|
-
type: "boolean",
|
|
1537
|
-
description: "Use mobile viewport"
|
|
1538
|
-
},
|
|
1539
|
-
skipTlsVerification: {
|
|
1540
|
-
type: "boolean",
|
|
1541
|
-
description: "Skip TLS certificate verification"
|
|
1542
|
-
},
|
|
1543
|
-
removeBase64Images: {
|
|
1544
|
-
type: "boolean",
|
|
1545
|
-
description: "Remove base64 encoded images from output"
|
|
1546
|
-
},
|
|
1547
|
-
location: {
|
|
1548
|
-
type: "object",
|
|
1549
|
-
properties: {
|
|
1550
|
-
country: {
|
|
1551
|
-
type: "string",
|
|
1552
|
-
description: "Country code for geolocation"
|
|
1553
|
-
},
|
|
1554
|
-
languages: {
|
|
1555
|
-
type: "array",
|
|
1556
|
-
items: { type: "string" },
|
|
1557
|
-
description: "Language codes for content"
|
|
1558
|
-
}
|
|
1559
|
-
},
|
|
1560
|
-
description: "Location settings for scraping"
|
|
1561
|
-
}
|
|
1562
|
-
},
|
|
1563
|
-
required: ["url"]
|
|
1564
|
-
}
|
|
1565
|
-
};
|
|
1566
|
-
var EXTRACT_TOOL = {
|
|
1567
|
-
name: "one_extract",
|
|
1568
|
-
description: "Extract structured information from web pages using LLM. Supports both cloud AI and self-hosted LLM extraction.",
|
|
1569
|
-
inputSchema: {
|
|
1570
|
-
type: "object",
|
|
1571
|
-
properties: {
|
|
1572
|
-
urls: {
|
|
1573
|
-
type: "array",
|
|
1574
|
-
items: { type: "string" },
|
|
1575
|
-
description: "List of URLs to extract information from"
|
|
1576
|
-
},
|
|
1577
|
-
prompt: {
|
|
1578
|
-
type: "string",
|
|
1579
|
-
description: "Prompt for the LLM extraction"
|
|
1580
|
-
},
|
|
1581
|
-
systemPrompt: {
|
|
1582
|
-
type: "string",
|
|
1583
|
-
description: "System prompt for LLM extraction"
|
|
1584
|
-
},
|
|
1585
|
-
schema: {
|
|
1586
|
-
type: "object",
|
|
1587
|
-
description: "JSON schema for structured data extraction"
|
|
1588
|
-
},
|
|
1589
|
-
allowExternalLinks: {
|
|
1590
|
-
type: "boolean",
|
|
1591
|
-
description: "Allow extraction from external links"
|
|
1592
|
-
},
|
|
1593
|
-
enableWebSearch: {
|
|
1594
|
-
type: "boolean",
|
|
1595
|
-
description: "Enable web search for additional context"
|
|
1596
|
-
},
|
|
1597
|
-
includeSubdomains: {
|
|
1598
|
-
type: "boolean",
|
|
1599
|
-
description: "Include subdomains in extraction"
|
|
1600
|
-
}
|
|
1601
|
-
},
|
|
1602
|
-
required: ["urls"]
|
|
1603
|
-
}
|
|
1604
|
-
};
|
|
1605
|
-
|
|
1606
|
-
// src/index.ts
|
|
1607
|
-
var import_firecrawl_js = __toESM(require("@mendable/firecrawl-js"), 1);
|
|
1608
|
-
var import_dotenvx = __toESM(require("@dotenvx/dotenvx"), 1);
|
|
1609
|
-
var import_duck_duck_scrape = require("duck-duck-scrape");
|
|
1610
|
-
import_dotenvx.default.config();
|
|
1611
|
-
var SEARCH_API_URL = process.env.SEARCH_API_URL;
|
|
1612
|
-
var SEARCH_API_KEY = process.env.SEARCH_API_KEY;
|
|
1613
|
-
var SEARCH_PROVIDER = process.env.SEARCH_PROVIDER ?? "searxng";
|
|
1614
|
-
var SAFE_SEARCH = process.env.SAFE_SEARCH ?? 0;
|
|
1615
|
-
var LIMIT = process.env.LIMIT ?? 10;
|
|
1616
|
-
var CATEGORIES = process.env.CATEGORIES ?? "general";
|
|
1617
|
-
var ENGINES = process.env.ENGINES ?? "all";
|
|
1618
|
-
var FORMAT = process.env.FORMAT ?? "json";
|
|
1619
|
-
var LANGUAGE = process.env.LANGUAGE ?? "auto";
|
|
1620
|
-
var TIME_RANGE = process.env.TIME_RANGE ?? "";
|
|
1621
|
-
var DEFAULT_TIMEOUT = process.env.TIMEOUT ?? 1e4;
|
|
1622
|
-
var FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;
|
|
1623
|
-
var FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL;
|
|
1624
|
-
var firecrawl = new import_firecrawl_js.default({
|
|
1625
|
-
apiKey: FIRECRAWL_API_KEY ?? "",
|
|
1626
|
-
...FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}
|
|
1627
|
-
});
|
|
1628
|
-
var server = new import_server.Server(
|
|
1629
|
-
{
|
|
1630
|
-
name: "one-search-mcp",
|
|
1631
|
-
version: "0.0.1"
|
|
1632
|
-
},
|
|
1633
|
-
{
|
|
1634
|
-
capabilities: {
|
|
1635
|
-
tools: {},
|
|
1636
|
-
logging: {}
|
|
1637
|
-
}
|
|
1638
|
-
}
|
|
1639
|
-
);
|
|
1640
|
-
var searchDefaultConfig = {
|
|
1641
|
-
limit: Number(LIMIT),
|
|
1642
|
-
categories: CATEGORIES,
|
|
1643
|
-
format: FORMAT,
|
|
1644
|
-
safesearch: SAFE_SEARCH,
|
|
1645
|
-
language: LANGUAGE,
|
|
1646
|
-
engines: ENGINES,
|
|
1647
|
-
time_range: TIME_RANGE,
|
|
1648
|
-
timeout: DEFAULT_TIMEOUT
|
|
1649
|
-
};
|
|
1650
|
-
server.setRequestHandler(import_types.ListToolsRequestSchema, async () => ({
|
|
1651
|
-
tools: [
|
|
1652
|
-
SEARCH_TOOL,
|
|
1653
|
-
EXTRACT_TOOL,
|
|
1654
|
-
SCRAPE_TOOL,
|
|
1655
|
-
MAP_TOOL
|
|
1656
|
-
]
|
|
1657
|
-
}));
|
|
1658
|
-
server.setRequestHandler(import_types.CallToolRequestSchema, async (request) => {
|
|
1659
|
-
const startTime = Date.now();
|
|
1660
|
-
try {
|
|
1661
|
-
const { name, arguments: args } = request.params;
|
|
1662
|
-
if (!args) {
|
|
1663
|
-
throw new Error("No arguments provided");
|
|
1664
|
-
}
|
|
1665
|
-
server.sendLoggingMessage({
|
|
1666
|
-
level: "info",
|
|
1667
|
-
data: `[${(/* @__PURE__ */ new Date()).toISOString()}] Received request for tool: [${name}]`
|
|
1668
|
-
});
|
|
1669
|
-
switch (name) {
|
|
1670
|
-
case "one_search": {
|
|
1671
|
-
if (!checkSearchArgs(args)) {
|
|
1672
|
-
throw new Error(`Invalid arguments for tool: [${name}]`);
|
|
1673
|
-
}
|
|
1674
|
-
try {
|
|
1675
|
-
const { results, success } = await processSearch({
|
|
1676
|
-
...args,
|
|
1677
|
-
apiKey: SEARCH_API_KEY ?? "",
|
|
1678
|
-
apiUrl: SEARCH_API_URL
|
|
1679
|
-
});
|
|
1680
|
-
if (!success) {
|
|
1681
|
-
throw new Error("Failed to search");
|
|
1682
|
-
}
|
|
1683
|
-
const resultsText = results.map((result) => `Title: ${result.title}
|
|
1684
|
-
URL: ${result.url}
|
|
1685
|
-
Description: ${result.snippet}
|
|
1686
|
-
${result.markdown ? `Content: ${result.markdown}` : ""}`);
|
|
1687
|
-
return {
|
|
1688
|
-
content: [
|
|
1689
|
-
{
|
|
1690
|
-
type: "text",
|
|
1691
|
-
text: resultsText.join("\n\n")
|
|
1692
|
-
}
|
|
1693
|
-
],
|
|
1694
|
-
results,
|
|
1695
|
-
success
|
|
1696
|
-
};
|
|
1697
|
-
} catch (error) {
|
|
1698
|
-
server.sendLoggingMessage({
|
|
1699
|
-
level: "error",
|
|
1700
|
-
data: `[${(/* @__PURE__ */ new Date()).toISOString()}] Error searching: ${error}`
|
|
1701
|
-
});
|
|
1702
|
-
const msg = error instanceof Error ? error.message : "Unknown error";
|
|
1703
|
-
return {
|
|
1704
|
-
success: false,
|
|
1705
|
-
content: [
|
|
1706
|
-
{
|
|
1707
|
-
type: "text",
|
|
1708
|
-
text: msg
|
|
1709
|
-
}
|
|
1710
|
-
]
|
|
1711
|
-
};
|
|
1712
|
-
}
|
|
1713
|
-
}
|
|
1714
|
-
case "one_scrape": {
|
|
1715
|
-
if (!checkScrapeArgs(args)) {
|
|
1716
|
-
throw new Error(`Invalid arguments for tool: [${name}]`);
|
|
1717
|
-
}
|
|
1718
|
-
try {
|
|
1719
|
-
const startTime2 = Date.now();
|
|
1720
|
-
server.sendLoggingMessage({
|
|
1721
|
-
level: "info",
|
|
1722
|
-
data: `[${(/* @__PURE__ */ new Date()).toISOString()}] Scraping started for url: [${args.url}]`
|
|
1723
|
-
});
|
|
1724
|
-
const { url: url2, ...scrapeArgs } = args;
|
|
1725
|
-
const { content, success, result } = await processScrape(url2, scrapeArgs);
|
|
1726
|
-
server.sendLoggingMessage({
|
|
1727
|
-
level: "info",
|
|
1728
|
-
data: `[${(/* @__PURE__ */ new Date()).toISOString()}] Scraping completed in ${Date.now() - startTime2}ms`
|
|
1729
|
-
});
|
|
1730
|
-
return {
|
|
1731
|
-
content,
|
|
1732
|
-
result,
|
|
1733
|
-
success
|
|
1734
|
-
};
|
|
1735
|
-
} catch (error) {
|
|
1736
|
-
server.sendLoggingMessage({
|
|
1737
|
-
level: "error",
|
|
1738
|
-
data: `[${(/* @__PURE__ */ new Date()).toISOString()}] Error scraping: ${error}`
|
|
1739
|
-
});
|
|
1740
|
-
const msg = error instanceof Error ? error.message : "Unknown error";
|
|
1741
|
-
return {
|
|
1742
|
-
success: false,
|
|
1743
|
-
content: [
|
|
1744
|
-
{
|
|
1745
|
-
type: "text",
|
|
1746
|
-
text: msg
|
|
1747
|
-
}
|
|
1748
|
-
]
|
|
1749
|
-
};
|
|
1750
|
-
}
|
|
1751
|
-
}
|
|
1752
|
-
case "one_map": {
|
|
1753
|
-
if (!checkMapArgs(args)) {
|
|
1754
|
-
throw new Error(`Invalid arguments for tool: [${name}]`);
|
|
1755
|
-
}
|
|
1756
|
-
try {
|
|
1757
|
-
const { content, success, result } = await processMapUrl(args.url, args);
|
|
1758
|
-
return {
|
|
1759
|
-
content,
|
|
1760
|
-
result,
|
|
1761
|
-
success
|
|
1762
|
-
};
|
|
1763
|
-
} catch (error) {
|
|
1764
|
-
server.sendLoggingMessage({
|
|
1765
|
-
level: "error",
|
|
1766
|
-
data: `[${(/* @__PURE__ */ new Date()).toISOString()}] Error mapping: ${error}`
|
|
1767
|
-
});
|
|
1768
|
-
const msg = error instanceof Error ? error.message : String(error);
|
|
1769
|
-
return {
|
|
1770
|
-
success: false,
|
|
1771
|
-
content: [
|
|
1772
|
-
{
|
|
1773
|
-
type: "text",
|
|
1774
|
-
text: msg
|
|
1775
|
-
}
|
|
1776
|
-
]
|
|
1777
|
-
};
|
|
1778
|
-
}
|
|
1779
|
-
}
|
|
1780
|
-
default: {
|
|
1781
|
-
throw new Error(`Unknown tool: ${name}`);
|
|
1782
|
-
}
|
|
1783
|
-
}
|
|
1784
|
-
} catch (error) {
|
|
1785
|
-
const msg = error instanceof Error ? error.message : String(error);
|
|
1786
|
-
server.sendLoggingMessage({
|
|
1787
|
-
level: "error",
|
|
1788
|
-
data: {
|
|
1789
|
-
message: `[${(/* @__PURE__ */ new Date()).toISOString()}] Error processing request: ${msg}`,
|
|
1790
|
-
tool: request.params.name,
|
|
1791
|
-
arguments: request.params.arguments,
|
|
1792
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1793
|
-
duration: Date.now() - startTime
|
|
1794
|
-
}
|
|
1795
|
-
});
|
|
1796
|
-
return {
|
|
1797
|
-
success: false,
|
|
1798
|
-
content: [
|
|
1799
|
-
{
|
|
1800
|
-
type: "text",
|
|
1801
|
-
text: msg
|
|
1802
|
-
}
|
|
1803
|
-
]
|
|
1804
|
-
};
|
|
1805
|
-
} finally {
|
|
1806
|
-
server.sendLoggingMessage({
|
|
1807
|
-
level: "info",
|
|
1808
|
-
data: `[${(/* @__PURE__ */ new Date()).toISOString()}] Request completed in ${Date.now() - startTime}ms`
|
|
1809
|
-
});
|
|
1810
|
-
}
|
|
1811
|
-
});
|
|
1812
|
-
async function processSearch(args) {
|
|
1813
|
-
switch (SEARCH_PROVIDER) {
|
|
1814
|
-
case "searxng": {
|
|
1815
|
-
const params = {
|
|
1816
|
-
...searchDefaultConfig,
|
|
1817
|
-
...args,
|
|
1818
|
-
apiKey: SEARCH_API_KEY
|
|
1819
|
-
};
|
|
1820
|
-
const { categories, language } = searchDefaultConfig;
|
|
1821
|
-
if (categories) {
|
|
1822
|
-
params.categories = categories;
|
|
1823
|
-
}
|
|
1824
|
-
if (language) {
|
|
1825
|
-
params.language = language;
|
|
1826
|
-
}
|
|
1827
|
-
return await searxngSearch(params);
|
|
1828
|
-
}
|
|
1829
|
-
case "tavily": {
|
|
1830
|
-
return await tavilySearch({
|
|
1831
|
-
...searchDefaultConfig,
|
|
1832
|
-
...args,
|
|
1833
|
-
apiKey: SEARCH_API_KEY
|
|
1834
|
-
});
|
|
1835
|
-
}
|
|
1836
|
-
case "bing": {
|
|
1837
|
-
return await bingSearch({
|
|
1838
|
-
...searchDefaultConfig,
|
|
1839
|
-
...args,
|
|
1840
|
-
apiKey: SEARCH_API_KEY
|
|
1841
|
-
});
|
|
1842
|
-
}
|
|
1843
|
-
case "duckduckgo": {
|
|
1844
|
-
const safeSearch = args.safeSearch ?? 0;
|
|
1845
|
-
const safeSearchOptions = [import_duck_duck_scrape.SafeSearchType.STRICT, import_duck_duck_scrape.SafeSearchType.MODERATE, import_duck_duck_scrape.SafeSearchType.OFF];
|
|
1846
|
-
return await duckDuckGoSearch({
|
|
1847
|
-
...searchDefaultConfig,
|
|
1848
|
-
...args,
|
|
1849
|
-
apiKey: SEARCH_API_KEY,
|
|
1850
|
-
safeSearch: safeSearchOptions[safeSearch]
|
|
1851
|
-
});
|
|
1852
|
-
}
|
|
1853
|
-
case "local": {
|
|
1854
|
-
return await localSearch({
|
|
1855
|
-
...searchDefaultConfig,
|
|
1856
|
-
...args
|
|
1857
|
-
});
|
|
1858
|
-
}
|
|
1859
|
-
default:
|
|
1860
|
-
throw new Error(`Unsupported search provider: ${SEARCH_PROVIDER}`);
|
|
1861
|
-
}
|
|
1862
|
-
}
|
|
1863
|
-
async function processScrape(url2, args) {
|
|
1864
|
-
const res = await firecrawl.scrapeUrl(url2, {
|
|
1865
|
-
...args
|
|
1866
|
-
});
|
|
1867
|
-
if (!res.success) {
|
|
1868
|
-
throw new Error(`Failed to scrape: ${res.error}`);
|
|
1869
|
-
}
|
|
1870
|
-
const content = [];
|
|
1871
|
-
if (res.markdown) {
|
|
1872
|
-
content.push(res.markdown);
|
|
1873
|
-
}
|
|
1874
|
-
if (res.rawHtml) {
|
|
1875
|
-
content.push(res.rawHtml);
|
|
1876
|
-
}
|
|
1877
|
-
if (res.links) {
|
|
1878
|
-
content.push(res.links.join("\n"));
|
|
1879
|
-
}
|
|
1880
|
-
if (res.screenshot) {
|
|
1881
|
-
content.push(res.screenshot);
|
|
1882
|
-
}
|
|
1883
|
-
if (res.html) {
|
|
1884
|
-
content.push(res.html);
|
|
1885
|
-
}
|
|
1886
|
-
if (res.extract) {
|
|
1887
|
-
content.push(res.extract);
|
|
1888
|
-
}
|
|
1889
|
-
return {
|
|
1890
|
-
content: [
|
|
1891
|
-
{
|
|
1892
|
-
type: "text",
|
|
1893
|
-
text: content.join("\n\n") || "No content found"
|
|
1894
|
-
}
|
|
1895
|
-
],
|
|
1896
|
-
result: res,
|
|
1897
|
-
success: true
|
|
1898
|
-
};
|
|
1899
|
-
}
|
|
1900
|
-
async function processMapUrl(url2, args) {
|
|
1901
|
-
const res = await firecrawl.mapUrl(url2, {
|
|
1902
|
-
...args
|
|
1903
|
-
});
|
|
1904
|
-
if ("error" in res) {
|
|
1905
|
-
throw new Error(`Failed to map: ${res.error}`);
|
|
1906
|
-
}
|
|
1907
|
-
if (!res.links) {
|
|
1908
|
-
throw new Error(`No links found from: ${url2}`);
|
|
1909
|
-
}
|
|
1910
|
-
return {
|
|
1911
|
-
content: [
|
|
1912
|
-
{
|
|
1913
|
-
type: "text",
|
|
1914
|
-
text: res.links.join("\n").trim()
|
|
1915
|
-
}
|
|
1916
|
-
],
|
|
1917
|
-
result: res.links,
|
|
1918
|
-
success: true
|
|
1919
|
-
};
|
|
1920
|
-
}
|
|
1921
|
-
function checkSearchArgs(args) {
|
|
1922
|
-
return typeof args === "object" && args !== null && "query" in args && typeof args.query === "string";
|
|
1923
|
-
}
|
|
1924
|
-
function checkScrapeArgs(args) {
|
|
1925
|
-
return typeof args === "object" && args !== null && "url" in args && typeof args.url === "string";
|
|
1926
|
-
}
|
|
1927
|
-
function checkMapArgs(args) {
|
|
1928
|
-
return typeof args === "object" && args !== null && "url" in args && typeof args.url === "string";
|
|
1929
|
-
}
|
|
1930
|
-
async function runServer() {
|
|
1931
|
-
try {
|
|
1932
|
-
process.stdout.write("Starting OneSearch MCP server...\n");
|
|
1933
|
-
const transport = new import_stdio.StdioServerTransport();
|
|
1934
|
-
await server.connect(transport);
|
|
1935
|
-
server.sendLoggingMessage({
|
|
1936
|
-
level: "info",
|
|
1937
|
-
data: "OneSearch MCP server started"
|
|
1938
|
-
});
|
|
1939
|
-
} catch (error) {
|
|
1940
|
-
const msg = error instanceof Error ? error.message : String(error);
|
|
1941
|
-
process.stderr.write(`Error starting server: ${msg}
|
|
1942
|
-
`);
|
|
1943
|
-
process.exit(1);
|
|
1944
|
-
}
|
|
1945
|
-
}
|
|
1946
|
-
runServer().catch((error) => {
|
|
1947
|
-
const msg = error instanceof Error ? error.message : String(error);
|
|
1948
|
-
process.stderr.write(`Error running server: ${msg}
|
|
1949
|
-
`);
|
|
1950
|
-
process.exit(1);
|
|
1951
|
-
});
|
|
2
|
+
"use strict";var Ae=Object.create;var U=Object.defineProperty;var xe=Object.getOwnPropertyDescriptor;var Le=Object.getOwnPropertyNames;var Pe=Object.getPrototypeOf,Ce=Object.prototype.hasOwnProperty;var q=(i,t,e,r)=>{if(t&&typeof t=="object"||typeof t=="function")for(let n of Le(t))!Ce.call(i,n)&&n!==e&&U(i,n,{get:()=>t[n],enumerable:!(r=xe(t,n))||r.enumerable});return i};var m=(i,t,e)=>(e=i!=null?Ae(Pe(i)):{},q(t||!i||!i.__esModule?U(e,"default",{value:i,enumerable:!0}):e,i)),Re=i=>q(U({},"__esModule",{value:!0}),i);var Ze={};module.exports=Re(Ze);var be=require("@modelcontextprotocol/sdk/server/index.js"),D=require("@modelcontextprotocol/sdk/types.js"),we=require("@modelcontextprotocol/sdk/server/stdio.js");async function H(i){let{query:t,limit:e=10,safeSearch:r=0,page:n=1,apiUrl:a="https://api.bing.microsoft.com/v7.0/search",apiKey:o,language:s}=i,u=["Off","Moderate","Strict"];if(!o)throw new Error("Bing API key is required");let h={q:t,count:e,offset:(n-1)*e,mkt:s,safeSearch:u[r]};try{let c=new URLSearchParams;Object.entries(h).forEach(([d,_])=>{_!==void 0&&c.set(d,_.toString())});let g=await fetch(`${a}?${c}`,{method:"GET",headers:{"Content-Type":"application/json","Ocp-Apim-Subscription-Key":o}});if(!g.ok)throw new Error(`Bing search error: ${g.status} ${g.statusText}`);return{results:(await g.json()).webPages?.value?.map(d=>({title:d.name,snippet:d.snippet,url:d.url,source:d.siteName,thumbnailUrl:d.thumbnailUrl,language:d.language,image:null,video:null,engine:"bing"}))??[],success:!0}}catch(c){let g=c instanceof Error?c.message:"Bing search error.";throw process.stdout.write(g),c}}var N=m(require("duck-duck-scrape"),1),j=m(require("async-retry"),1);async function W(i){try{let{query:t,timeout:e=1e4,safeSearch:r=N.SafeSearchType.OFF,retry:n={retries:3},...a}=i,o=await(0,j.default)(()=>N.search(t,{...a,safeSearch:r},{response_timeout:e}),n);return{results:(o?{noResults:o.noResults,vqd:o.vqd,results:o.results}:{noResults:!0,vqd:"",results:[]}).results.map(u=>({title:u.title,snippet:u.description,url:u.url,source:u.hostname,image:null,video:null,engine:"duckduckgo"})),success:!0}}catch(t){let e=t instanceof Error?t.message:"DuckDuckGo search error.";throw process.stdout.write(e),t}}var V=m(require("url"),1);async function X(i){try{let{query:t,page:e=1,limit:r=10,categories:n="general",engines:a="all",safeSearch:o=0,format:s="json",language:u="auto",timeRange:h="",timeout:c=1e4,apiKey:g,apiUrl:l}=i;if(!l)throw new Error("SearxNG API URL is required");let p=new AbortController,b=setTimeout(()=>p.abort(),Number(c)),d={q:t,pageno:e,categories:n,format:s,safesearch:o,language:u,engines:a,time_range:h},_=`${l}/search`,B=V.default.format({query:d}),k={"Content-Type":"application/json"};g&&(k.Authorization=`Bearer ${g}`);let ve=await fetch(`${_}${B}`,{method:"POST",headers:k,signal:p.signal});clearTimeout(b);let G=await ve.json();return G.results?{results:G.results.slice(0,r).map(y=>{let Te=y.img_src?{thumbnail:y.thumbnail_src,src:y.img_src}:null,Ne=y.iframe_src?{thumbnail:y.thumbnail_src,src:y.iframe_src}:null;return{title:y.title,snippet:y.content,url:y.url,source:y.source,image:Te,video:Ne,engine:y.engine}}),success:!0}:{results:[],success:!1}}catch(t){let e=t instanceof Error?t.message:"Searxng search error.";throw process.stdout.write(e),t}}var K=require("@tavily/core");async function Y(i){let{query:t,limit:e=10,categories:r="general",timeRange:n,apiKey:a}=i;if(!a)throw new Error("Tavily API key is required");try{let o=(0,K.tavily)({apiKey:a}),s={topic:r,timeRange:n,maxResults:e};return{results:(await o.search(t,s)).results.map(c=>({title:c.title,url:c.url,snippet:c.content,engine:"tavily"})),success:!0}}catch(o){let s=o instanceof Error?o.message:"Tavily search error.";throw process.stdout.write(s),o}}var Oe=require("puppeteer-core");var S=m(require("fs"),1),M=m(require("path"),1),z=m(require("os"),1),J=require("@agent-infra/logger"),A=class{logger;constructor(t){this.logger=t??J.defaultLogger}get browsers(){let t=z.homedir(),e=process.env.LOCALAPPDATA;return[{name:"Chromium",executable:{win32:"C:\\Program Files\\Chromium\\Application\\chrome.exe",darwin:"/Applications/Chromium.app/Contents/MacOS/Chromium",linux:"/usr/bin/chromium"},userDataDir:{win32:`${e}\\Chromium\\User Data`,darwin:`${t}/Library/Application Support/Chromium`,linux:`${t}/.config/chromium`}},{name:"Google Chrome",executable:{win32:"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",darwin:"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",linux:"/usr/bin/google-chrome"},userDataDir:{win32:`${e}\\Google\\Chrome\\User Data`,darwin:`${t}/Library/Application Support/Google/Chrome`,linux:`${t}/.config/google-chrome`}},{name:"Google Chrome Canary",executable:{win32:"C:\\Program Files\\Google\\Chrome Canary\\Application\\chrome.exe",darwin:"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",linux:"/usr/bin/google-chrome-canary"},userDataDir:{win32:`${e}\\Google\\Chrome Canary\\User Data`,darwin:`${t}/Library/Application Support/Google/Chrome Canary`,linux:`${t}/.config/google-chrome-canary`}}]}findBrowser(t){let e=process.platform;if(this.logger.info("Finding browser on platform:",e),e!=="darwin"&&e!=="win32"&&e!=="linux"){let a=new Error(`Unsupported platform: ${e}`);throw this.logger.error(a.message),a}let r=t?this.browsers.find(a=>a.name===t&&S.existsSync(a.executable[e])):this.browsers.find(a=>S.existsSync(a.executable[e]));if(this.logger.log("browser",r),!r){let a=t?new Error(`Cannot find browser: ${t}`):new Error("Cannot find a supported browser on your system. Please install Chrome, Edge, or Brave.");throw this.logger.error(a.message),a}let n={executable:r.executable[e],userDataDir:r.userDataDir[e]};return this.logger.success(`Found browser: ${r.name}`),this.logger.info("Browser details:",n),n}getBrowserProfiles(t){let e=this.findBrowser(t);try{let n=JSON.parse(S.readFileSync(M.join(e.userDataDir,"Local State"),"utf8")).profile.info_cache;return Object.entries(n).map(([a,o])=>({displayName:o.name,path:M.join(e.userDataDir,a)}))}catch{return[]}}findChrome(){try{let{executable:t}=this.findBrowser("Google Chrome");return t}catch{return null}}};var Q=require("@agent-infra/logger"),E=class{browser=null;logger;activePage=null;constructor(t){this.logger=t?.logger??Q.defaultLogger,this.logger.info("Browser Options:",t)}getBrowser(){if(!this.browser)throw new Error("Browser not launched");return this.browser}async setupPageListener(){this.browser&&this.browser.on("targetcreated",async t=>{let e=await t.page();e&&(this.logger.info("New page created:",await e.url()),this.activePage=e,e.once("close",()=>{this.activePage===e&&(this.activePage=null)}),e.once("error",()=>{this.activePage===e&&(this.activePage=null)}))})}async close(){this.logger.info("Closing browser");try{await this.browser?.close(),this.browser=null,this.logger.success("Browser closed successfully")}catch(t){throw this.logger.error("Failed to close browser:",t),t}}async evaluateOnNewPage(t){let{url:e,pageFunction:r,pageFunctionParams:n,beforePageLoad:a,afterPageLoad:o,beforeSendResult:s,waitForOptions:u}=t,h=await this.browser.newPage();try{await a?.(h),await h.goto(e,{waitUntil:"networkidle2",...u}),await o?.(h);let c=await h.evaluateHandle(()=>window),g=await h.evaluate(r,c,...n);return await s?.(h,g),await c.dispose(),await h.close(),g}catch(c){throw await h.close(),c}}async createPage(){if(!this.browser)throw this.logger.error("No active browser"),new Error("Browser not launched");return await this.browser.newPage()}async getActivePage(){if(!this.browser)throw new Error("Browser not launched");if(this.activePage)try{return await this.activePage.evaluate(()=>document.readyState),this.activePage}catch(e){this.logger.warn("Active page no longer available:",e),this.activePage=null}let t=await this.browser.pages();if(t.length===0)return this.activePage=await this.createPage(),this.activePage;for(let e=t.length-1;e>=0;e--){let r=t[e];try{return await r.evaluate(()=>document.readyState),this.activePage=r,r}catch{continue}}throw new Error("No active page found")}};var Z=m(require("puppeteer-core"),1);var x=class extends E{browserFinder=new A;async launch(t={}){this.logger.info("Launching browser with options:",t);let e=t?.executablePath||this.browserFinder.findBrowser().executable;this.logger.info("Using executable path:",e);let r=t?.defaultViewport?.width??1280,n=t?.defaultViewport?.height??800,a={executablePath:e,headless:t?.headless??!1,defaultViewport:{width:r,height:n},args:["--no-sandbox","--mute-audio","--disable-gpu","--disable-http2","--disable-blink-features=AutomationControlled","--disable-infobars","--disable-background-timer-throttling","--disable-popup-blocking","--disable-backgrounding-occluded-windows","--disable-renderer-backgrounding","--disable-window-activation","--disable-focus-on-load","--no-default-browser-check","--disable-web-security","--disable-features=IsolateOrigins,site-per-process","--disable-site-isolation-trials",`--window-size=${r},${n+90}`,t?.proxy?`--proxy-server=${t.proxy}`:"",t?.profilePath?`--profile-directory=${t.profilePath}`:""].filter(Boolean),ignoreDefaultArgs:["--enable-automation"],timeout:t.timeout??0,downloadBehavior:{policy:"deny"}};this.logger.info("Launch options:",a);try{this.browser=await Z.launch(a),await this.setupPageListener(),this.logger.success("Browser launched successfully")}catch(o){throw this.logger.error("Failed to launch browser:",o),o}}};var Ie=m(require("puppeteer-core"),1);var ee='function q(t,e){if(e&&e.documentElement)t=e,e=arguments[2];else if(!t||!t.documentElement)throw new Error("First argument to Readability constructor should be a document object.");if(e=e||{},this._doc=t,this._docJSDOMParser=this._doc.firstChild.__JSDOMParser__,this._articleTitle=null,this._articleByline=null,this._articleDir=null,this._articleSiteName=null,this._attempts=[],this._debug=!!e.debug,this._maxElemsToParse=e.maxElemsToParse||this.DEFAULT_MAX_ELEMS_TO_PARSE,this._nbTopCandidates=e.nbTopCandidates||this.DEFAULT_N_TOP_CANDIDATES,this._charThreshold=e.charThreshold||this.DEFAULT_CHAR_THRESHOLD,this._classesToPreserve=this.CLASSES_TO_PRESERVE.concat(e.classesToPreserve||[]),this._keepClasses=!!e.keepClasses,this._serializer=e.serializer||function(i){return i.innerHTML},this._disableJSONLD=!!e.disableJSONLD,this._allowedVideoRegex=e.allowedVideoRegex||this.REGEXPS.videos,this._flags=this.FLAG_STRIP_UNLIKELYS|this.FLAG_WEIGHT_CLASSES|this.FLAG_CLEAN_CONDITIONALLY,this._debug){let i=function(r){if(r.nodeType==r.TEXT_NODE)return`${r.nodeName} ("${r.textContent}")`;let l=Array.from(r.attributes||[],function(a){return`${a.name}="${a.value}"`}).join(" ");return`<${r.localName} ${l}>`};this.log=function(){if(typeof console!="undefined"){let l=Array.from(arguments,a=>a&&a.nodeType==this.ELEMENT_NODE?i(a):a);l.unshift("Reader: (Readability)"),console.log.apply(console,l)}else if(typeof dump!="undefined"){var r=Array.prototype.map.call(arguments,function(l){return l&&l.nodeName?i(l):l}).join(" ");dump("Reader: (Readability) "+r+`\n`)}}}else this.log=function(){}}q.prototype={FLAG_STRIP_UNLIKELYS:1,FLAG_WEIGHT_CLASSES:2,FLAG_CLEAN_CONDITIONALLY:4,ELEMENT_NODE:1,TEXT_NODE:3,DEFAULT_MAX_ELEMS_TO_PARSE:0,DEFAULT_N_TOP_CANDIDATES:5,DEFAULT_TAGS_TO_SCORE:"section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","),DEFAULT_CHAR_THRESHOLD:500,REGEXPS:{unlikelyCandidates:/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,okMaybeItsACandidate:/and|article|body|column|content|main|shadow/i,positive:/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,negative:/-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,extraneous:/print|archive|comment|discuss|e[\\-]?mail|share|reply|all|login|sign|single|utility/i,byline:/byline|author|dateline|writtenby|p-author/i,replaceFonts:/<(\\/?)font[^>]*>/gi,normalize:/\\s{2,}/g,videos:/\\/\\/(www\\.)?((dailymotion|youtube|youtube-nocookie|player\\.vimeo|v\\.qq)\\.com|(archive|upload\\.wikimedia)\\.org|player\\.twitch\\.tv)/i,shareElements:/(\\b|_)(share|sharedaddy)(\\b|_)/i,nextLink:/(next|weiter|continue|>([^\\|]|$)|\xBB([^\\|]|$))/i,prevLink:/(prev|earl|old|new|<|\xAB)/i,tokenize:/\\W+/g,whitespace:/^\\s*$/,hasContent:/\\S$/,hashUrl:/^#.+/,srcsetUrl:/(\\S+)(\\s+[\\d.]+[xw])?(\\s*(?:,|$))/g,b64DataUrl:/^data:\\s*([^\\s;,]+)\\s*;\\s*base64\\s*,/i,commas:/\\u002C|\\u060C|\\uFE50|\\uFE10|\\uFE11|\\u2E41|\\u2E34|\\u2E32|\\uFF0C/g,jsonLdArticleTypes:/^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/},UNLIKELY_ROLES:["menu","menubar","complementary","navigation","alert","alertdialog","dialog"],DIV_TO_P_ELEMS:new Set(["BLOCKQUOTE","DL","DIV","IMG","OL","P","PRE","TABLE","UL"]),ALTER_TO_DIV_EXCEPTIONS:["DIV","ARTICLE","SECTION","P"],PRESENTATIONAL_ATTRIBUTES:["align","background","bgcolor","border","cellpadding","cellspacing","frame","hspace","rules","style","valign","vspace"],DEPRECATED_SIZE_ATTRIBUTE_ELEMS:["TABLE","TH","TD","HR","PRE"],PHRASING_ELEMS:["ABBR","AUDIO","B","BDO","BR","BUTTON","CITE","CODE","DATA","DATALIST","DFN","EM","EMBED","I","IMG","INPUT","KBD","LABEL","MARK","MATH","METER","NOSCRIPT","OBJECT","OUTPUT","PROGRESS","Q","RUBY","SAMP","SCRIPT","SELECT","SMALL","SPAN","STRONG","SUB","SUP","TEXTAREA","TIME","VAR","WBR"],CLASSES_TO_PRESERVE:["page"],HTML_ESCAPE_MAP:{lt:"<",gt:">",amp:"&",quot:\'"\',apos:"\'"},_postProcessContent:function(t){this._fixRelativeUris(t),this._simplifyNestedElements(t),this._keepClasses||this._cleanClasses(t)},_removeNodes:function(t,e){if(this._docJSDOMParser&&t._isLiveNodeList)throw new Error("Do not pass live node lists to _removeNodes");for(var i=t.length-1;i>=0;i--){var r=t[i],l=r.parentNode;l&&(!e||e.call(this,r,i,t))&&l.removeChild(r)}},_replaceNodeTags:function(t,e){if(this._docJSDOMParser&&t._isLiveNodeList)throw new Error("Do not pass live node lists to _replaceNodeTags");for(let i of t)this._setNodeTag(i,e)},_forEachNode:function(t,e){Array.prototype.forEach.call(t,e,this)},_findNode:function(t,e){return Array.prototype.find.call(t,e,this)},_someNode:function(t,e){return Array.prototype.some.call(t,e,this)},_everyNode:function(t,e){return Array.prototype.every.call(t,e,this)},_concatNodeLists:function(){var t=Array.prototype.slice,e=t.call(arguments),i=e.map(function(r){return t.call(r)});return Array.prototype.concat.apply([],i)},_getAllNodesWithTag:function(t,e){return t.querySelectorAll?t.querySelectorAll(e.join(",")):[].concat.apply([],e.map(function(i){var r=t.getElementsByTagName(i);return Array.isArray(r)?r:Array.from(r)}))},_cleanClasses:function(t){var e=this._classesToPreserve,i=(t.getAttribute("class")||"").split(/\\s+/).filter(function(r){return e.indexOf(r)!=-1}).join(" ");for(i?t.setAttribute("class",i):t.removeAttribute("class"),t=t.firstElementChild;t;t=t.nextElementSibling)this._cleanClasses(t)},_fixRelativeUris:function(t){var e=this._doc.baseURI,i=this._doc.documentURI;function r(s){if(e==i&&s.charAt(0)=="#")return s;try{return new URL(s,e).href}catch(h){}return s}var l=this._getAllNodesWithTag(t,["a"]);this._forEachNode(l,function(s){var h=s.getAttribute("href");if(h)if(h.indexOf("javascript:")===0)if(s.childNodes.length===1&&s.childNodes[0].nodeType===this.TEXT_NODE){var c=this._doc.createTextNode(s.textContent);s.parentNode.replaceChild(c,s)}else{for(var n=this._doc.createElement("span");s.firstChild;)n.appendChild(s.firstChild);s.parentNode.replaceChild(n,s)}else s.setAttribute("href",r(h))});var a=this._getAllNodesWithTag(t,["img","picture","figure","video","audio","source"]);this._forEachNode(a,function(s){var h=s.getAttribute("src"),c=s.getAttribute("poster"),n=s.getAttribute("srcset");if(h&&s.setAttribute("src",r(h)),c&&s.setAttribute("poster",r(c)),n){var u=n.replace(this.REGEXPS.srcsetUrl,function(m,b,N,v){return r(b)+(N||"")+v});s.setAttribute("srcset",u)}})},_simplifyNestedElements:function(t){for(var e=t;e;){if(e.parentNode&&["DIV","SECTION"].includes(e.tagName)&&!(e.id&&e.id.startsWith("readability"))){if(this._isElementWithoutContent(e)){e=this._removeAndGetNext(e);continue}else if(this._hasSingleTagInsideElement(e,"DIV")||this._hasSingleTagInsideElement(e,"SECTION")){for(var i=e.children[0],r=0;r<e.attributes.length;r++)i.setAttribute(e.attributes[r].name,e.attributes[r].value);e.parentNode.replaceChild(i,e),e=i;continue}}e=this._getNextNode(e)}},_getArticleTitle:function(){var t=this._doc,e="",i="";try{e=i=t.title.trim(),typeof e!="string"&&(e=i=this._getInnerText(t.getElementsByTagName("title")[0]))}catch(u){}var r=!1;function l(u){return u.split(/\\s+/).length}if(/ [\\|\\-\\\\\\/>\xBB] /.test(e))r=/ [\\\\\\/>\xBB] /.test(e),e=i.replace(/(.*)[\\|\\-\\\\\\/>\xBB] .*/gi,"$1"),l(e)<3&&(e=i.replace(/[^\\|\\-\\\\\\/>\xBB]*[\\|\\-\\\\\\/>\xBB](.*)/gi,"$1"));else if(e.indexOf(": ")!==-1){var a=this._concatNodeLists(t.getElementsByTagName("h1"),t.getElementsByTagName("h2")),s=e.trim(),h=this._someNode(a,function(u){return u.textContent.trim()===s});h||(e=i.substring(i.lastIndexOf(":")+1),l(e)<3?e=i.substring(i.indexOf(":")+1):l(i.substr(0,i.indexOf(":")))>5&&(e=i))}else if(e.length>150||e.length<15){var c=t.getElementsByTagName("h1");c.length===1&&(e=this._getInnerText(c[0]))}e=e.trim().replace(this.REGEXPS.normalize," ");var n=l(e);return n<=4&&(!r||n!=l(i.replace(/[\\|\\-\\\\\\/>\xBB]+/g,""))-1)&&(e=i),e},_prepDocument:function(){var t=this._doc;this._removeNodes(this._getAllNodesWithTag(t,["style"])),t.body&&this._replaceBrs(t.body),this._replaceNodeTags(this._getAllNodesWithTag(t,["font"]),"SPAN")},_nextNode:function(t){for(var e=t;e&&e.nodeType!=this.ELEMENT_NODE&&this.REGEXPS.whitespace.test(e.textContent);)e=e.nextSibling;return e},_replaceBrs:function(t){this._forEachNode(this._getAllNodesWithTag(t,["br"]),function(e){for(var i=e.nextSibling,r=!1;(i=this._nextNode(i))&&i.tagName=="BR";){r=!0;var l=i.nextSibling;i.parentNode.removeChild(i),i=l}if(r){var a=this._doc.createElement("p");for(e.parentNode.replaceChild(a,e),i=a.nextSibling;i;){if(i.tagName=="BR"){var s=this._nextNode(i.nextSibling);if(s&&s.tagName=="BR")break}if(!this._isPhrasingContent(i))break;var h=i.nextSibling;a.appendChild(i),i=h}for(;a.lastChild&&this._isWhitespace(a.lastChild);)a.removeChild(a.lastChild);a.parentNode.tagName==="P"&&this._setNodeTag(a.parentNode,"DIV")}})},_setNodeTag:function(t,e){if(this.log("_setNodeTag",t,e),this._docJSDOMParser)return t.localName=e.toLowerCase(),t.tagName=e.toUpperCase(),t;for(var i=t.ownerDocument.createElement(e);t.firstChild;)i.appendChild(t.firstChild);t.parentNode.replaceChild(i,t),t.readability&&(i.readability=t.readability);for(var r=0;r<t.attributes.length;r++)try{i.setAttribute(t.attributes[r].name,t.attributes[r].value)}catch(l){}return i},_prepArticle:function(t){this._cleanStyles(t),this._markDataTables(t),this._fixLazyImages(t),this._cleanConditionally(t,"form"),this._cleanConditionally(t,"fieldset"),this._clean(t,"object"),this._clean(t,"embed"),this._clean(t,"footer"),this._clean(t,"link"),this._clean(t,"aside");var e=this.DEFAULT_CHAR_THRESHOLD;this._forEachNode(t.children,function(i){this._cleanMatchedNodes(i,function(r,l){return this.REGEXPS.shareElements.test(l)&&r.textContent.length<e})}),this._clean(t,"iframe"),this._clean(t,"input"),this._clean(t,"textarea"),this._clean(t,"select"),this._clean(t,"button"),this._cleanHeaders(t),this._cleanConditionally(t,"table"),this._cleanConditionally(t,"ul"),this._cleanConditionally(t,"div"),this._replaceNodeTags(this._getAllNodesWithTag(t,["h1"]),"h2"),this._removeNodes(this._getAllNodesWithTag(t,["p"]),function(i){var r=i.getElementsByTagName("img").length,l=i.getElementsByTagName("embed").length,a=i.getElementsByTagName("object").length,s=i.getElementsByTagName("iframe").length,h=r+l+a+s;return h===0&&!this._getInnerText(i,!1)}),this._forEachNode(this._getAllNodesWithTag(t,["br"]),function(i){var r=this._nextNode(i.nextSibling);r&&r.tagName=="P"&&i.parentNode.removeChild(i)}),this._forEachNode(this._getAllNodesWithTag(t,["table"]),function(i){var r=this._hasSingleTagInsideElement(i,"TBODY")?i.firstElementChild:i;if(this._hasSingleTagInsideElement(r,"TR")){var l=r.firstElementChild;if(this._hasSingleTagInsideElement(l,"TD")){var a=l.firstElementChild;a=this._setNodeTag(a,this._everyNode(a.childNodes,this._isPhrasingContent)?"P":"DIV"),i.parentNode.replaceChild(a,i)}}})},_initializeNode:function(t){switch(t.readability={contentScore:0},t.tagName){case"DIV":t.readability.contentScore+=5;break;case"PRE":case"TD":case"BLOCKQUOTE":t.readability.contentScore+=3;break;case"ADDRESS":case"OL":case"UL":case"DL":case"DD":case"DT":case"LI":case"FORM":t.readability.contentScore-=3;break;case"H1":case"H2":case"H3":case"H4":case"H5":case"H6":case"TH":t.readability.contentScore-=5;break}t.readability.contentScore+=this._getClassWeight(t)},_removeAndGetNext:function(t){var e=this._getNextNode(t,!0);return t.parentNode.removeChild(t),e},_getNextNode:function(t,e){if(!e&&t.firstElementChild)return t.firstElementChild;if(t.nextElementSibling)return t.nextElementSibling;do t=t.parentNode;while(t&&!t.nextElementSibling);return t&&t.nextElementSibling},_textSimilarity:function(t,e){var i=t.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean),r=e.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean);if(!i.length||!r.length)return 0;var l=r.filter(s=>!i.includes(s)),a=l.join(" ").length/r.join(" ").length;return 1-a},_checkByline:function(t,e){if(this._articleByline)return!1;if(t.getAttribute!==void 0)var i=t.getAttribute("rel"),r=t.getAttribute("itemprop");return(i==="author"||r&&r.indexOf("author")!==-1||this.REGEXPS.byline.test(e))&&this._isValidByline(t.textContent)?(this._articleByline=t.textContent.trim(),!0):!1},_getNodeAncestors:function(t,e){e=e||0;for(var i=0,r=[];t.parentNode&&(r.push(t.parentNode),!(e&&++i===e));)t=t.parentNode;return r},_grabArticle:function(t){this.log("**** grabArticle ****");var e=this._doc,i=t!==null;if(t=t||this._doc.body,!t)return this.log("No body found in document. Abort."),null;for(var r=t.innerHTML;;){this.log("Starting grabArticle loop");var l=this._flagIsActive(this.FLAG_STRIP_UNLIKELYS),a=[],s=this._doc.documentElement;let J=!0;for(;s;){s.tagName==="HTML"&&(this._articleLang=s.getAttribute("lang"));var h=s.className+" "+s.id;if(!this._isProbablyVisible(s)){this.log("Removing hidden node - "+h),s=this._removeAndGetNext(s);continue}if(s.getAttribute("aria-modal")=="true"&&s.getAttribute("role")=="dialog"){s=this._removeAndGetNext(s);continue}if(this._checkByline(s,h)){s=this._removeAndGetNext(s);continue}if(J&&this._headerDuplicatesTitle(s)){this.log("Removing header: ",s.textContent.trim(),this._articleTitle.trim()),J=!1,s=this._removeAndGetNext(s);continue}if(l){if(this.REGEXPS.unlikelyCandidates.test(h)&&!this.REGEXPS.okMaybeItsACandidate.test(h)&&!this._hasAncestorTag(s,"table")&&!this._hasAncestorTag(s,"code")&&s.tagName!=="BODY"&&s.tagName!=="A"){this.log("Removing unlikely candidate - "+h),s=this._removeAndGetNext(s);continue}if(this.UNLIKELY_ROLES.includes(s.getAttribute("role"))){this.log("Removing content with role "+s.getAttribute("role")+" - "+h),s=this._removeAndGetNext(s);continue}}if((s.tagName==="DIV"||s.tagName==="SECTION"||s.tagName==="HEADER"||s.tagName==="H1"||s.tagName==="H2"||s.tagName==="H3"||s.tagName==="H4"||s.tagName==="H5"||s.tagName==="H6")&&this._isElementWithoutContent(s)){s=this._removeAndGetNext(s);continue}if(this.DEFAULT_TAGS_TO_SCORE.indexOf(s.tagName)!==-1&&a.push(s),s.tagName==="DIV"){for(var c=null,n=s.firstChild;n;){var u=n.nextSibling;if(this._isPhrasingContent(n))c!==null?c.appendChild(n):this._isWhitespace(n)||(c=e.createElement("p"),s.replaceChild(c,n),c.appendChild(n));else if(c!==null){for(;c.lastChild&&this._isWhitespace(c.lastChild);)c.removeChild(c.lastChild);c=null}n=u}if(this._hasSingleTagInsideElement(s,"P")&&this._getLinkDensity(s)<.25){var m=s.children[0];s.parentNode.replaceChild(m,s),s=m,a.push(s)}else this._hasChildBlockElement(s)||(s=this._setNodeTag(s,"P"),a.push(s))}s=this._getNextNode(s)}var b=[];this._forEachNode(a,function(A){if(!(!A.parentNode||typeof A.parentNode.tagName=="undefined")){var T=this._getInnerText(A);if(!(T.length<25)){var K=this._getNodeAncestors(A,5);if(K.length!==0){var C=0;C+=1,C+=T.split(this.REGEXPS.commas).length,C+=Math.min(Math.floor(T.length/100),3),this._forEachNode(K,function(S,F){if(!(!S.tagName||!S.parentNode||typeof S.parentNode.tagName=="undefined")){if(typeof S.readability=="undefined"&&(this._initializeNode(S),b.push(S)),F===0)var X=1;else F===1?X=2:X=F*3;S.readability.contentScore+=C/X}})}}}});for(var N=[],v=0,y=b.length;v<y;v+=1){var E=b[v],d=E.readability.contentScore*(1-this._getLinkDensity(E));E.readability.contentScore=d,this.log("Candidate:",E,"with score "+d);for(var p=0;p<this._nbTopCandidates;p++){var x=N[p];if(!x||d>x.readability.contentScore){N.splice(p,0,E),N.length>this._nbTopCandidates&&N.pop();break}}}var o=N[0]||null,L=!1,g;if(o===null||o.tagName==="BODY"){for(o=e.createElement("DIV"),L=!0;t.firstChild;)this.log("Moving child out:",t.firstChild),o.appendChild(t.firstChild);t.appendChild(o),this._initializeNode(o)}else if(o){for(var I=[],P=1;P<N.length;P++)N[P].readability.contentScore/o.readability.contentScore>=.75&&I.push(this._getNodeAncestors(N[P]));var O=3;if(I.length>=O)for(g=o.parentNode;g.tagName!=="BODY";){for(var G=0,H=0;H<I.length&&G<O;H++)G+=Number(I[H].includes(g));if(G>=O){o=g;break}g=g.parentNode}o.readability||this._initializeNode(o),g=o.parentNode;for(var M=o.readability.contentScore,Q=M/3;g.tagName!=="BODY";){if(!g.readability){g=g.parentNode;continue}var V=g.readability.contentScore;if(V<Q)break;if(V>M){o=g;break}M=g.readability.contentScore,g=g.parentNode}for(g=o.parentNode;g.tagName!="BODY"&&g.children.length==1;)o=g,g=o.parentNode;o.readability||this._initializeNode(o)}var _=e.createElement("DIV");i&&(_.id="readability-content");var Z=Math.max(10,o.readability.contentScore*.2);g=o.parentNode;for(var U=g.children,w=0,j=U.length;w<j;w++){var f=U[w],R=!1;if(this.log("Looking at sibling node:",f,f.readability?"with score "+f.readability.contentScore:""),this.log("Sibling has score",f.readability?f.readability.contentScore:"Unknown"),f===o)R=!0;else{var $=0;if(f.className===o.className&&o.className!==""&&($+=o.readability.contentScore*.2),f.readability&&f.readability.contentScore+$>=Z)R=!0;else if(f.nodeName==="P"){var Y=this._getLinkDensity(f),z=this._getInnerText(f),k=z.length;(k>80&&Y<.25||k<80&&k>0&&Y===0&&z.search(/\\.( |$)/)!==-1)&&(R=!0)}}R&&(this.log("Appending node:",f),this.ALTER_TO_DIV_EXCEPTIONS.indexOf(f.nodeName)===-1&&(this.log("Altering sibling:",f,"to div."),f=this._setNodeTag(f,"DIV")),_.appendChild(f),U=g.children,w-=1,j-=1)}if(this._debug&&this.log("Article content pre-prep: "+_.innerHTML),this._prepArticle(_),this._debug&&this.log("Article content post-prep: "+_.innerHTML),L)o.id="readability-page-1",o.className="page";else{var B=e.createElement("DIV");for(B.id="readability-page-1",B.className="page";_.firstChild;)B.appendChild(_.firstChild);_.appendChild(B)}this._debug&&this.log("Article content after paging: "+_.innerHTML);var W=!0,D=this._getInnerText(_,!0).length;if(D<this._charThreshold)if(W=!1,t.innerHTML=r,this._flagIsActive(this.FLAG_STRIP_UNLIKELYS))this._removeFlag(this.FLAG_STRIP_UNLIKELYS),this._attempts.push({articleContent:_,textLength:D});else if(this._flagIsActive(this.FLAG_WEIGHT_CLASSES))this._removeFlag(this.FLAG_WEIGHT_CLASSES),this._attempts.push({articleContent:_,textLength:D});else if(this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY))this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY),this._attempts.push({articleContent:_,textLength:D});else{if(this._attempts.push({articleContent:_,textLength:D}),this._attempts.sort(function(A,T){return T.textLength-A.textLength}),!this._attempts[0].textLength)return null;_=this._attempts[0].articleContent,W=!0}if(W){var tt=[g,o].concat(this._getNodeAncestors(g));return this._someNode(tt,function(A){if(!A.tagName)return!1;var T=A.getAttribute("dir");return T?(this._articleDir=T,!0):!1}),_}}},_isValidByline:function(t){return typeof t=="string"||t instanceof String?(t=t.trim(),t.length>0&&t.length<100):!1},_unescapeHtmlEntities:function(t){if(!t)return t;var e=this.HTML_ESCAPE_MAP;return t.replace(/&(quot|amp|apos|lt|gt);/g,function(i,r){return e[r]}).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi,function(i,r,l){var a=parseInt(r||l,r?16:10);return String.fromCharCode(a)})},_getJSONLD:function(t){var e=this._getAllNodesWithTag(t,["script"]),i;return this._forEachNode(e,function(r){if(!i&&r.getAttribute("type")==="application/ld+json")try{var l=r.textContent.replace(/^\\s*<!\\[CDATA\\[|\\]\\]>\\s*$/g,""),a=JSON.parse(l);if(!a["@context"]||!a["@context"].match(/^https?\\:\\/\\/schema\\.org$/)||(!a["@type"]&&Array.isArray(a["@graph"])&&(a=a["@graph"].find(function(n){return(n["@type"]||"").match(this.REGEXPS.jsonLdArticleTypes)})),!a||!a["@type"]||!a["@type"].match(this.REGEXPS.jsonLdArticleTypes)))return;if(i={},typeof a.name=="string"&&typeof a.headline=="string"&&a.name!==a.headline){var s=this._getArticleTitle(),h=this._textSimilarity(a.name,s)>.75,c=this._textSimilarity(a.headline,s)>.75;c&&!h?i.title=a.headline:i.title=a.name}else typeof a.name=="string"?i.title=a.name.trim():typeof a.headline=="string"&&(i.title=a.headline.trim());a.author&&(typeof a.author.name=="string"?i.byline=a.author.name.trim():Array.isArray(a.author)&&a.author[0]&&typeof a.author[0].name=="string"&&(i.byline=a.author.filter(function(n){return n&&typeof n.name=="string"}).map(function(n){return n.name.trim()}).join(", "))),typeof a.description=="string"&&(i.excerpt=a.description.trim()),a.publisher&&typeof a.publisher.name=="string"&&(i.siteName=a.publisher.name.trim()),typeof a.datePublished=="string"&&(i.datePublished=a.datePublished.trim());return}catch(n){this.log(n.message)}}),i||{}},_getArticleMetadata:function(t){var e={},i={},r=this._doc.getElementsByTagName("meta"),l=/\\s*(article|dc|dcterm|og|twitter)\\s*:\\s*(author|creator|description|published_time|title|site_name)\\s*/gi,a=/^\\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\\s*[\\.:]\\s*)?(author|creator|description|title|site_name)\\s*$/i;return this._forEachNode(r,function(s){var h=s.getAttribute("name"),c=s.getAttribute("property"),n=s.getAttribute("content");if(n){var u=null,m=null;c&&(u=c.match(l),u&&(m=u[0].toLowerCase().replace(/\\s/g,""),i[m]=n.trim())),!u&&h&&a.test(h)&&(m=h,n&&(m=m.toLowerCase().replace(/\\s/g,"").replace(/\\./g,":"),i[m]=n.trim()))}}),e.title=t.title||i["dc:title"]||i["dcterm:title"]||i["og:title"]||i["weibo:article:title"]||i["weibo:webpage:title"]||i.title||i["twitter:title"],e.title||(e.title=this._getArticleTitle()),e.byline=t.byline||i["dc:creator"]||i["dcterm:creator"]||i.author,e.excerpt=t.excerpt||i["dc:description"]||i["dcterm:description"]||i["og:description"]||i["weibo:article:description"]||i["weibo:webpage:description"]||i.description||i["twitter:description"],e.siteName=t.siteName||i["og:site_name"],e.publishedTime=t.datePublished||i["article:published_time"]||null,e.title=this._unescapeHtmlEntities(e.title),e.byline=this._unescapeHtmlEntities(e.byline),e.excerpt=this._unescapeHtmlEntities(e.excerpt),e.siteName=this._unescapeHtmlEntities(e.siteName),e.publishedTime=this._unescapeHtmlEntities(e.publishedTime),e},_isSingleImage:function(t){return t.tagName==="IMG"?!0:t.children.length!==1||t.textContent.trim()!==""?!1:this._isSingleImage(t.children[0])},_unwrapNoscriptImages:function(t){var e=Array.from(t.getElementsByTagName("img"));this._forEachNode(e,function(r){for(var l=0;l<r.attributes.length;l++){var a=r.attributes[l];switch(a.name){case"src":case"srcset":case"data-src":case"data-srcset":return}if(/\\.(jpg|jpeg|png|webp)/i.test(a.value))return}r.parentNode.removeChild(r)});var i=Array.from(t.getElementsByTagName("noscript"));this._forEachNode(i,function(r){var l=t.createElement("div");if(l.innerHTML=r.innerHTML,!!this._isSingleImage(l)){var a=r.previousElementSibling;if(a&&this._isSingleImage(a)){var s=a;s.tagName!=="IMG"&&(s=a.getElementsByTagName("img")[0]);for(var h=l.getElementsByTagName("img")[0],c=0;c<s.attributes.length;c++){var n=s.attributes[c];if(n.value!==""&&(n.name==="src"||n.name==="srcset"||/\\.(jpg|jpeg|png|webp)/i.test(n.value))){if(h.getAttribute(n.name)===n.value)continue;var u=n.name;h.hasAttribute(u)&&(u="data-old-"+u),h.setAttribute(u,n.value)}}r.parentNode.replaceChild(l.firstElementChild,a)}}})},_removeScripts:function(t){this._removeNodes(this._getAllNodesWithTag(t,["script","noscript"]))},_hasSingleTagInsideElement:function(t,e){return t.children.length!=1||t.children[0].tagName!==e?!1:!this._someNode(t.childNodes,function(i){return i.nodeType===this.TEXT_NODE&&this.REGEXPS.hasContent.test(i.textContent)})},_isElementWithoutContent:function(t){return t.nodeType===this.ELEMENT_NODE&&t.textContent.trim().length==0&&(t.children.length==0||t.children.length==t.getElementsByTagName("br").length+t.getElementsByTagName("hr").length)},_hasChildBlockElement:function(t){return this._someNode(t.childNodes,function(e){return this.DIV_TO_P_ELEMS.has(e.tagName)||this._hasChildBlockElement(e)})},_isPhrasingContent:function(t){return t.nodeType===this.TEXT_NODE||this.PHRASING_ELEMS.indexOf(t.tagName)!==-1||(t.tagName==="A"||t.tagName==="DEL"||t.tagName==="INS")&&this._everyNode(t.childNodes,this._isPhrasingContent)},_isWhitespace:function(t){return t.nodeType===this.TEXT_NODE&&t.textContent.trim().length===0||t.nodeType===this.ELEMENT_NODE&&t.tagName==="BR"},_getInnerText:function(t,e){e=typeof e=="undefined"?!0:e;var i=t.textContent.trim();return e?i.replace(this.REGEXPS.normalize," "):i},_getCharCount:function(t,e){return e=e||",",this._getInnerText(t).split(e).length-1},_cleanStyles:function(t){if(!(!t||t.tagName.toLowerCase()==="svg")){for(var e=0;e<this.PRESENTATIONAL_ATTRIBUTES.length;e++)t.removeAttribute(this.PRESENTATIONAL_ATTRIBUTES[e]);this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.indexOf(t.tagName)!==-1&&(t.removeAttribute("width"),t.removeAttribute("height"));for(var i=t.firstElementChild;i!==null;)this._cleanStyles(i),i=i.nextElementSibling}},_getLinkDensity:function(t){var e=this._getInnerText(t).length;if(e===0)return 0;var i=0;return this._forEachNode(t.getElementsByTagName("a"),function(r){var l=r.getAttribute("href"),a=l&&this.REGEXPS.hashUrl.test(l)?.3:1;i+=this._getInnerText(r).length*a}),i/e},_getClassWeight:function(t){if(!this._flagIsActive(this.FLAG_WEIGHT_CLASSES))return 0;var e=0;return typeof t.className=="string"&&t.className!==""&&(this.REGEXPS.negative.test(t.className)&&(e-=25),this.REGEXPS.positive.test(t.className)&&(e+=25)),typeof t.id=="string"&&t.id!==""&&(this.REGEXPS.negative.test(t.id)&&(e-=25),this.REGEXPS.positive.test(t.id)&&(e+=25)),e},_clean:function(t,e){var i=["object","embed","iframe"].indexOf(e)!==-1;this._removeNodes(this._getAllNodesWithTag(t,[e]),function(r){if(i){for(var l=0;l<r.attributes.length;l++)if(this._allowedVideoRegex.test(r.attributes[l].value))return!1;if(r.tagName==="object"&&this._allowedVideoRegex.test(r.innerHTML))return!1}return!0})},_hasAncestorTag:function(t,e,i,r){i=i||3,e=e.toUpperCase();for(var l=0;t.parentNode;){if(i>0&&l>i)return!1;if(t.parentNode.tagName===e&&(!r||r(t.parentNode)))return!0;t=t.parentNode,l++}return!1},_getRowAndColumnCount:function(t){for(var e=0,i=0,r=t.getElementsByTagName("tr"),l=0;l<r.length;l++){var a=r[l].getAttribute("rowspan")||0;a&&(a=parseInt(a,10)),e+=a||1;for(var s=0,h=r[l].getElementsByTagName("td"),c=0;c<h.length;c++){var n=h[c].getAttribute("colspan")||0;n&&(n=parseInt(n,10)),s+=n||1}i=Math.max(i,s)}return{rows:e,columns:i}},_markDataTables:function(t){for(var e=t.getElementsByTagName("table"),i=0;i<e.length;i++){var r=e[i],l=r.getAttribute("role");if(l=="presentation"){r._readabilityDataTable=!1;continue}var a=r.getAttribute("datatable");if(a=="0"){r._readabilityDataTable=!1;continue}var s=r.getAttribute("summary");if(s){r._readabilityDataTable=!0;continue}var h=r.getElementsByTagName("caption")[0];if(h&&h.childNodes.length>0){r._readabilityDataTable=!0;continue}var c=["col","colgroup","tfoot","thead","th"],n=function(m){return!!r.getElementsByTagName(m)[0]};if(c.some(n)){this.log("Data table because found data-y descendant"),r._readabilityDataTable=!0;continue}if(r.getElementsByTagName("table")[0]){r._readabilityDataTable=!1;continue}var u=this._getRowAndColumnCount(r);if(u.rows>=10||u.columns>4){r._readabilityDataTable=!0;continue}r._readabilityDataTable=u.rows*u.columns>10}},_fixLazyImages:function(t){this._forEachNode(this._getAllNodesWithTag(t,["img","picture","figure"]),function(e){if(e.src&&this.REGEXPS.b64DataUrl.test(e.src)){var i=this.REGEXPS.b64DataUrl.exec(e.src);if(i[1]==="image/svg+xml")return;for(var r=!1,l=0;l<e.attributes.length;l++){var a=e.attributes[l];if(a.name!=="src"&&/\\.(jpg|jpeg|png|webp)/i.test(a.value)){r=!0;break}}if(r){var s=e.src.search(/base64\\s*/i)+7,h=e.src.length-s;h<133&&e.removeAttribute("src")}}if(!((e.src||e.srcset&&e.srcset!="null")&&e.className.toLowerCase().indexOf("lazy")===-1)){for(var c=0;c<e.attributes.length;c++)if(a=e.attributes[c],!(a.name==="src"||a.name==="srcset"||a.name==="alt")){var n=null;if(/\\.(jpg|jpeg|png|webp)\\s+\\d/.test(a.value)?n="srcset":/^\\s*\\S+\\.(jpg|jpeg|png|webp)\\S*\\s*$/.test(a.value)&&(n="src"),n){if(e.tagName==="IMG"||e.tagName==="PICTURE")e.setAttribute(n,a.value);else if(e.tagName==="FIGURE"&&!this._getAllNodesWithTag(e,["img","picture"]).length){var u=this._doc.createElement("img");u.setAttribute(n,a.value),e.appendChild(u)}}}}})},_getTextDensity:function(t,e){var i=this._getInnerText(t,!0).length;if(i===0)return 0;var r=0,l=this._getAllNodesWithTag(t,e);return this._forEachNode(l,a=>r+=this._getInnerText(a,!0).length),r/i},_cleanConditionally:function(t,e){this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)&&this._removeNodes(this._getAllNodesWithTag(t,[e]),function(i){var r=function(g){return g._readabilityDataTable},l=e==="ul"||e==="ol";if(!l){var a=0,s=this._getAllNodesWithTag(i,["ul","ol"]);this._forEachNode(s,g=>a+=this._getInnerText(g).length),l=a/this._getInnerText(i).length>.9}if(e==="table"&&r(i)||this._hasAncestorTag(i,"table",-1,r)||this._hasAncestorTag(i,"code"))return!1;var h=this._getClassWeight(i);this.log("Cleaning Conditionally",i);var c=0;if(h+c<0)return!0;if(this._getCharCount(i,",")<10){for(var n=i.getElementsByTagName("p").length,u=i.getElementsByTagName("img").length,m=i.getElementsByTagName("li").length-100,b=i.getElementsByTagName("input").length,N=this._getTextDensity(i,["h1","h2","h3","h4","h5","h6"]),v=0,y=this._getAllNodesWithTag(i,["object","embed","iframe"]),E=0;E<y.length;E++){for(var d=0;d<y[E].attributes.length;d++)if(this._allowedVideoRegex.test(y[E].attributes[d].value))return!1;if(y[E].tagName==="object"&&this._allowedVideoRegex.test(y[E].innerHTML))return!1;v++}var p=this._getLinkDensity(i),x=this._getInnerText(i).length,o=u>1&&n/u<.5&&!this._hasAncestorTag(i,"figure")||!l&&m>n||b>Math.floor(n/3)||!l&&N<.9&&x<25&&(u===0||u>2)&&!this._hasAncestorTag(i,"figure")||!l&&h<25&&p>.2||h>=25&&p>.5||v===1&&x<75||v>1;if(l&&o){for(var L=0;L<i.children.length;L++)if(i.children[L].children.length>1)return o;let g=i.getElementsByTagName("li").length;if(u==g)return!1}return o}return!1})},_cleanMatchedNodes:function(t,e){for(var i=this._getNextNode(t,!0),r=this._getNextNode(t);r&&r!=i;)e.call(this,r,r.className+" "+r.id)?r=this._removeAndGetNext(r):r=this._getNextNode(r)},_cleanHeaders:function(t){let e=this._getAllNodesWithTag(t,["h1","h2"]);this._removeNodes(e,function(i){let r=this._getClassWeight(i)<0;return r&&this.log("Removing header with low class weight:",i),r})},_headerDuplicatesTitle:function(t){if(t.tagName!="H1"&&t.tagName!="H2")return!1;var e=this._getInnerText(t,!1);return this.log("Evaluating similarity of header:",e,this._articleTitle),this._textSimilarity(this._articleTitle,e)>.75},_flagIsActive:function(t){return(this._flags&t)>0},_removeFlag:function(t){this._flags=this._flags&~t},_isProbablyVisible:function(t){return(!t.style||t.style.display!="none")&&(!t.style||t.style.visibility!="hidden")&&!t.hasAttribute("hidden")&&(!t.hasAttribute("aria-hidden")||t.getAttribute("aria-hidden")!="true"||t.className&&t.className.indexOf&&t.className.indexOf("fallback-image")!==-1)},parse:function(){if(this._maxElemsToParse>0){var t=this._doc.getElementsByTagName("*").length;if(t>this._maxElemsToParse)throw new Error("Aborting parsing document; "+t+" elements found")}this._unwrapNoscriptImages(this._doc);var e=this._disableJSONLD?{}:this._getJSONLD(this._doc);this._removeScripts(this._doc),this._prepDocument();var i=this._getArticleMetadata(e);this._articleTitle=i.title;var r=this._grabArticle();if(!r)return null;if(this.log("Grabbed: "+r.innerHTML),this._postProcessContent(r),!i.excerpt){var l=r.getElementsByTagName("p");l.length>0&&(i.excerpt=l[0].textContent.trim())}var a=r.textContent;return{title:this._articleTitle,byline:i.byline||this._articleByline,dir:this._articleDir,lang:this._articleLang,content:this._serializer(r),textContent:a,length:a.length,excerpt:i.excerpt,siteName:i.siteName||this._articleSiteName,publishedTime:i.publishedTime}}};typeof module=="object"&&(module.exports=q);\n';var le=require("@agent-infra/logger");var te=m(require("turndown"),1),re=require("turndown-plugin-gfm"),ie=require("@agent-infra/logger"),ne=m(require("user-agents"),1),De=i=>{try{return new URL(i)}catch{return null}},se=i=>{let t=De(i);if(!t)return!0;let{hostname:e}=t;return["reddit.com","www.reddit.com","x.com","twitter.com","www.twitter.com","youtube.com","www.youtube.com"].includes(e)};async function Be(i){let t=new ne.default({deviceCategory:"desktop"}).toString();await i.setBypassCSP(!0),await i.setUserAgent(t),await i.evaluate(()=>{Object.defineProperty(navigator,"webdriver",{get:()=>{}}),Object.defineProperty(navigator,"languages",{get:()=>["en-US","en"]}),Object.defineProperty(navigator,"plugins",{get:()=>[{},{},{},{},{}]}),Object.defineProperty(navigator,"headless",{get:()=>!1});let e=window.navigator.permissions.query;window.navigator.permissions.query=r=>r.name==="notifications"?Promise.resolve({state:Notification.permission}):e(r)})}async function $(i){await Be(i),await i.setRequestInterception(!0),i.on("request",t=>t.resourceType()!=="document"?t.abort():t.isNavigationRequest()?t.continue():t.abort())}function ae(i,t){let e=new Function("module",`${t}
|
|
3
|
+
return module.exports`)({}),r=i.document;r.querySelectorAll("script,noscript,style,link,svg,img,video,iframe,canvas,.reflist").forEach(s=>s.remove());let n=new e(r).parse(),a=n?.content||"",o=r.title;return{content:a,title:n?.title||o}}function oe(i,t={}){if(!i)return"";try{let{codeBlockStyle:e="fenced",headingStyle:r="atx",emDelimiter:n="*",strongDelimiter:a="**",gfmExtension:o=!0}=t,s=new te.default({codeBlockStyle:e,headingStyle:r,emDelimiter:n,strongDelimiter:a});return o&&s.use(re.gfm),s.turndown(i)}catch(e){return ie.defaultLogger.error("Error converting HTML to Markdown:",e),i}}var L=class{queue=[];concurrency;running=0;results=[];constructor(t=1){this.concurrency=t}add(t){return new Promise((e,r)=>{this.queue.push(async()=>{try{let n=await t();return e(n),n}catch(n){throw r(n),n}}),this.run()})}async run(){if(this.running>=this.concurrency||this.queue.length===0)return;this.running++;let t=this.queue.shift();try{let e=await t();this.results.push(e)}catch{}finally{this.running--,this.run()}}async waitAll(){for(;this.running>0||this.queue.length>0;)await new Promise(t=>setTimeout(t,100));return this.results}};var v=class{getSearchUrl(t,e){return`https://www.bing.com/search?${new URLSearchParams({q:`${e.excludeDomains&&e.excludeDomains.length>0?`${e.excludeDomains.map(n=>`-site:${n}`).join(" ")} `:""}${t}`,count:`${e.count||10}`}).toString()}`}extractSearchResults(t){let e=[],r=t.document,n=o=>{try{return new URL(o),!0}catch{return!1}},a=o=>{let s=o.cloneNode(!0);return s.querySelectorAll("h2").forEach(l=>l.remove()),s.querySelectorAll(".b_attribution").forEach(l=>l.remove()),s.querySelectorAll("script, style").forEach(l=>l.remove()),Array.from(s.querySelectorAll("*")).filter(l=>l.textContent?.trim()).map(l=>l.textContent?.trim()).filter(Boolean).reduce((l,p)=>(l.some(b=>b.includes(p)||p.includes(b))||l.push(p),l),[]).join(" ").trim().replace(/\s+/g," ")};try{r.querySelectorAll(".b_algo").forEach(s=>{let u=s.querySelector("h2"),c=s.querySelector("h2 a")?.getAttribute("href"),g=a(s);if(!c||!n(c))return;let l={title:u?.textContent||"",snippet:g,url:c,content:""};!l.title||!l.url||e.push(l)})}catch(o){throw console.error("Error extracting search results from Bing:",o),o}return e}async waitForSearchResults(t,e){await t.waitForSelector("#b_results",{timeout:e??1e4})}};var P=class{getSearchUrl(t,e){let r=e.excludeDomains&&e.excludeDomains.length>0?e.excludeDomains.map(a=>`-site:${a}`).join(" "):"";return`https://www.baidu.com/s?${new URLSearchParams({wd:r?`${r} ${t}`:t,rn:`${e.count||10}`}).toString()}`}extractSearchResults(t){let e=[],r=t.document;try{r.querySelectorAll(".result").forEach(a=>{let o=a.querySelector(".t a"),s=o?.getAttribute("href"),u=a.querySelector(".c-span-last .content-right_2s-H4");if(!s)return;let h={title:o?.textContent||"",url:s,snippet:u?.textContent||"",content:""};!h.title||!h.url||e.push(h)})}catch(n){console.error("Error extracting search results from Baidu:",n)}return e}async waitForSearchResults(t,e){await t.waitForSelector("#page",{timeout:e??1e4})}};var C=class{getSearchUrl(t,e){let{count:r=10,excludeDomains:n=[]}=e,a=n&&n.length>0?n.map(s=>`-site:${s}`).join(" "):"";return`https://www.sogou.com/web?${new URLSearchParams({query:`${a?`${a} `:""}${t}`,num:`${r}`}).toString()}`}extractSearchResults(t){let e=[],r=t.document,n=s=>{try{return new URL(s),!0}catch{return!1}},a="https://www.sogou.com",o={results:".results .vrwrap",resultTitle:".vr-title",resultLink:".vr-title > a",resultSnippet:[".star-wiki",".fz-mid",".attribute-centent"],resultSnippetExcluded:[".text-lightgray",".zan-box",".tag-website"],related:"#main .vrwrap.middle-better-hintBox .hint-mid"};try{r.querySelectorAll(o.results).forEach(u=>{let h=u.querySelector(o.resultTitle),c=u.querySelector(o.resultLink)?.getAttribute("href"),l=o.resultSnippet.map(b=>{let d=u.cloneNode(!0);return o.resultSnippetExcluded.forEach(B=>{d.querySelector(B)?.remove()}),d.querySelector(b)?.textContent?.trim()||""}).filter(Boolean).join(" ").replace(/\s+/g," ").trim();if(c?.includes("http")||(c=`${a}${c}`),!c?.trim()||!n(c))return;let p={title:h?.textContent?.trim()||"",url:c,snippet:l,content:""};!p.title||!p.url||e.push(p)})}catch(s){let u=s instanceof Error?s.message:String(s);throw console.error("Error extracting search results from Sogou:",u),s}return e}async waitForSearchResults(t,e){await t.waitForSelector("#pagebar_container",{timeout:e??1e4})}};var R=class{getSearchUrl(t,e){let r=new URLSearchParams({q:`${e.excludeDomains&&e.excludeDomains.length>0?`${e.excludeDomains.map(n=>`-site:${n}`).join(" ")} `:""}${t}`,num:`${e.count||10}`});return r.set("udm","14"),`https://www.google.com/search?${r.toString()}`}extractSearchResults(t){let e=[],r=t.document,n=o=>{try{return new URL(o),!0}catch{return!1}},a=o=>{let s=o.cloneNode(!0);return s.querySelectorAll("h3").forEach(l=>l.remove()),s.querySelectorAll("cite").forEach(l=>l.remove()),s.querySelectorAll("script, style").forEach(l=>l.remove()),Array.from(s.querySelectorAll("*")).filter(l=>l.textContent?.trim()).map(l=>l.textContent?.trim()).filter(Boolean).reduce((l,p)=>(l.some(b=>b.includes(p)||p.includes(b))||l.push(p),l),[]).join(" ").trim().replace(/\s+/g," ")};try{r.querySelectorAll(".tF2Cxc").forEach(s=>{let u=s.querySelector("h3"),c=s.querySelector("a")?.getAttribute("href"),g=a(s.parentElement||s);if(!c||!n(c))return;let l={title:u?.textContent||"",url:c,snippet:g,content:""};!l.title||!l.url||e.push(l)})}catch(o){console.error(o)}return e}async waitForSearchResults(t,e){await t.waitForSelector("#search",{timeout:e??1e4})}};function F(i){switch(i){case"bing":return new v;case"baidu":return new P;case"sogou":return new C;case"google":return new R;default:return new v}}var O=class{constructor(t={}){this.config=t;this.logger=t?.logger??le.defaultLogger,this.browser=t.browser??new x({logger:this.logger}),this.defaultEngine=t.defaultEngine??"bing"}logger;browser;isBrowserOpen=!1;defaultEngine;async perform(t){this.logger.info("Starting search with options:",t);let e=Array.isArray(t.query)?t.query:[t.query],r=t.excludeDomains||[],n=t.count&&Math.max(3,Math.floor(t.count/e.length)),a=t.engine||this.defaultEngine;try{this.isBrowserOpen?this.logger.info("Using existing browser instance"):(this.logger.info("Launching browser"),await this.browser.launch(this.config.browserOptions),this.isBrowserOpen=!0);let o=new L(t.concurrency||15),s=new Set,u=await Promise.all(e.map(h=>this.search(this.browser,{query:h,count:n,queue:o,visitedUrls:s,excludeDomains:r,truncate:t.truncate,needVisitedUrls:t.needVisitedUrls,engine:a})));return this.logger.success("Search completed successfully"),u.flat()}catch(o){return this.logger.error("Search failed:",o),[]}finally{!t.keepBrowserOpen&&this.isBrowserOpen&&await this.closeBrowser()}}async closeBrowser(){this.isBrowserOpen&&(this.logger.info("Closing browser"),await this.browser.close(),this.isBrowserOpen=!1)}async search(t,e){let r=F(e.engine),n=r.getSearchUrl(e.query,{count:e.count,excludeDomains:e.excludeDomains});this.logger.info(`Searching with ${e.engine} engine: ${n}`);let a=await t.evaluateOnNewPage({url:n,waitForOptions:{waitUntil:"networkidle2"},pageFunction:r.extractSearchResults,pageFunctionParams:[],beforePageLoad:async s=>{await $(s)},afterPageLoad:async s=>{r.waitForSearchResults&&await r.waitForSearchResults(s,1e4)}});return this.logger.info(`Fetched ${a?.length??0} links`),a=a?.filter(s=>e.visitedUrls.has(s.url)?!1:(e.visitedUrls.add(s.url),!se(s.url)))||[],a.length?(await Promise.allSettled(e.needVisitedUrls?a.map(s=>e.queue.add(()=>this.visitLink(this.browser,s))):a)).map(s=>s.status==="rejected"||!s.value?null:{...s.value,content:e.truncate?s.value.content.slice(0,e.truncate):s.value.content}).filter(s=>s!==null):(this.logger.info("No valid links found"),[])}async visitLink(t,e){try{this.logger.info("Visiting link:",e.url);let r=await t.evaluateOnNewPage({url:e.url,pageFunction:ae,pageFunctionParams:[ee],beforePageLoad:async n=>{await $(n)}});if(r){let n=oe(r.content);return{...r,url:e.url,content:n,snippet:e.snippet}}}catch(r){this.logger.error("Failed to visit link:",r)}}};var ue=require("@agent-infra/logger"),ce=new ue.ConsoleLogger("[LocalSearch]");async function he(i){let{query:t,limit:e=10}=i,{engines:r="all"}=i,n=new O({logger:ce,browserOptions:{headless:!0}});r==="all"&&(r="bing,google,baidu,sogou");try{let a=r.split(",");if(a.length===0)throw new Error("engines is required");let o=[];for(let s of a){let u=await n.perform({query:t,count:e,engine:s,needVisitedUrls:!1});if(u.length>0){o.push(...u);break}}return ce.info(`Found ${o.length} results for ${t}`,o),{results:o,success:!0}}catch(a){let o=a instanceof Error?a.message:"Local search error.";throw process.stdout.write(o),a}finally{await n.closeBrowser()}}var ge={name:"one_search",description:"Search and retrieve content from web pages. Returns SERP results by default (url, title, description).",inputSchema:{type:"object",properties:{query:{type:"string",description:"Search query string"},limit:{type:"number",description:"Maximum number of results to return (default: 10)"},language:{type:"string",description:"Language code for search results (default: auto)"},categories:{type:"string",enum:["general","news","images","videos","it","science","map","music","files","social_media"],description:"Categories to search for (default: general)"},timeRange:{type:"string",description:"Time range for search results (default: all)",enum:["all","day","week","month","year"]}},required:["query"]}},pe={name:"one_map",description:"Discover URLs from a starting point. Can use both sitemap.xml and HTML link discovery.",inputSchema:{type:"object",properties:{url:{type:"string",description:"Starting URL for URL discovery"},search:{type:"string",description:"Optional search term to filter URLs"},ignoreSitemap:{type:"boolean",description:"Skip sitemap.xml discovery and only use HTML links"},sitemapOnly:{type:"boolean",description:"Only use sitemap.xml for discovery, ignore HTML links"},includeSubdomains:{type:"boolean",description:"Include URLs from subdomains in results"},limit:{type:"number",description:"Maximum number of URLs to return"}},required:["url"]}},de={name:"one_scrape",description:"Scrape a single webpage with advanced options for content extraction. Supports various formats including markdown, HTML, and screenshots. Can execute custom actions like clicking or scrolling before scraping.",inputSchema:{type:"object",properties:{url:{type:"string",description:"The URL to scrape"},formats:{type:"array",items:{type:"string",enum:["markdown","html","rawHtml","screenshot","links","screenshot@fullPage","extract"]},description:"Content formats to extract (default: ['markdown'])"},onlyMainContent:{type:"boolean",description:"Extract only the main content, filtering out navigation, footers, etc."},includeTags:{type:"array",items:{type:"string"},description:"HTML tags to specifically include in extraction"},excludeTags:{type:"array",items:{type:"string"},description:"HTML tags to exclude from extraction"},waitFor:{type:"number",description:"Time in milliseconds to wait for dynamic content to load"},timeout:{type:"number",description:"Maximum time in milliseconds to wait for the page to load"},actions:{type:"array",items:{type:"object",properties:{type:{type:"string",enum:["wait","click","screenshot","write","press","scroll","scrape","executeJavascript"],description:"Type of action to perform"},selector:{type:"string",description:"CSS selector for the target element"},milliseconds:{type:"number",description:"Time to wait in milliseconds (for wait action)"},text:{type:"string",description:"Text to write (for write action)"},key:{type:"string",description:"Key to press (for press action)"},direction:{type:"string",enum:["up","down"],description:"Scroll direction"},script:{type:"string",description:"JavaScript code to execute"},fullPage:{type:"boolean",description:"Take full page screenshot"}},required:["type"]},description:"List of actions to perform before scraping"},extract:{type:"object",properties:{schema:{type:"object",description:"Schema for structured data extraction"},systemPrompt:{type:"string",description:"System prompt for LLM extraction"},prompt:{type:"string",description:"User prompt for LLM extraction"}},description:"Configuration for structured data extraction"},mobile:{type:"boolean",description:"Use mobile viewport"},skipTlsVerification:{type:"boolean",description:"Skip TLS certificate verification"},removeBase64Images:{type:"boolean",description:"Remove base64 encoded images from output"},location:{type:"object",properties:{country:{type:"string",description:"Country code for geolocation"},languages:{type:"array",items:{type:"string"},description:"Language codes for content"}},description:"Location settings for scraping"}},required:["url"]}},me={name:"one_extract",description:"Extract structured information from web pages using LLM. Supports both cloud AI and self-hosted LLM extraction.",inputSchema:{type:"object",properties:{urls:{type:"array",items:{type:"string"},description:"List of URLs to extract information from"},prompt:{type:"string",description:"Prompt for the LLM extraction"},systemPrompt:{type:"string",description:"System prompt for LLM extraction"},schema:{type:"object",description:"JSON schema for structured data extraction"},allowExternalLinks:{type:"boolean",description:"Allow extraction from external links"},enableWebSearch:{type:"boolean",description:"Enable web search for additional context"},includeSubdomains:{type:"boolean",description:"Include subdomains in extraction"}},required:["urls"]}};var _e=m(require("@mendable/firecrawl-js"),1),Se=m(require("@dotenvx/dotenvx"),1),I=require("duck-duck-scrape");Se.default.config();var ke=process.env.SEARCH_API_URL,T=process.env.SEARCH_API_KEY,fe=process.env.SEARCH_PROVIDER??"local",Ue=process.env.SAFE_SEARCH??0,Me=process.env.LIMIT??10,$e=process.env.CATEGORIES??"general",Fe=process.env.ENGINES??"all",Ge=process.env.FORMAT??"json",qe=process.env.LANGUAGE??"auto",He=process.env.TIME_RANGE??"",je=process.env.TIMEOUT??1e4,We=process.env.FIRECRAWL_API_KEY,ye=process.env.FIRECRAWL_API_URL,Ee=new _e.default({apiKey:We??"",...ye?{apiUrl:ye}:{}}),f=new be.Server({name:"one-search-mcp",version:"0.0.1"},{capabilities:{tools:{},logging:{}}}),w={limit:Number(Me),categories:$e,format:Ge,safesearch:Ue,language:qe,engines:Fe,time_range:He,timeout:je};f.setRequestHandler(D.ListToolsRequestSchema,async()=>({tools:[ge,me,de,pe]}));f.setRequestHandler(D.CallToolRequestSchema,async i=>{let t=Date.now();try{let{name:e,arguments:r}=i.params;if(!r)throw new Error("No arguments provided");switch(f.sendLoggingMessage({level:"info",data:`[${new Date().toISOString()}] Received request for tool: [${e}]`}),e){case"one_search":{if(!Ye(r))throw new Error(`Invalid arguments for tool: [${e}]`);try{let{results:n,success:a}=await Ve({...r,apiKey:T??"",apiUrl:ke});if(!a)throw new Error("Failed to search");return{content:[{type:"text",text:n.map(s=>`Title: ${s.title}
|
|
4
|
+
URL: ${s.url}
|
|
5
|
+
Description: ${s.snippet}
|
|
6
|
+
${s.markdown?`Content: ${s.markdown}`:""}`).join(`
|
|
7
|
+
|
|
8
|
+
`)}],results:n,success:a}}catch(n){return f.sendLoggingMessage({level:"error",data:`[${new Date().toISOString()}] Error searching: ${n}`}),{success:!1,content:[{type:"text",text:n instanceof Error?n.message:"Unknown error"}]}}}case"one_scrape":{if(!ze(r))throw new Error(`Invalid arguments for tool: [${e}]`);try{let n=Date.now();f.sendLoggingMessage({level:"info",data:`[${new Date().toISOString()}] Scraping started for url: [${r.url}]`});let{url:a,...o}=r,{content:s,success:u,result:h}=await Xe(a,o);return f.sendLoggingMessage({level:"info",data:`[${new Date().toISOString()}] Scraping completed in ${Date.now()-n}ms`}),{content:s,result:h,success:u}}catch(n){return f.sendLoggingMessage({level:"error",data:`[${new Date().toISOString()}] Error scraping: ${n}`}),{success:!1,content:[{type:"text",text:n instanceof Error?n.message:"Unknown error"}]}}}case"one_map":{if(!Je(r))throw new Error(`Invalid arguments for tool: [${e}]`);try{let{content:n,success:a,result:o}=await Ke(r.url,r);return{content:n,result:o,success:a}}catch(n){return f.sendLoggingMessage({level:"error",data:`[${new Date().toISOString()}] Error mapping: ${n}`}),{success:!1,content:[{type:"text",text:n instanceof Error?n.message:String(n)}]}}}default:throw new Error(`Unknown tool: ${e}`)}}catch(e){let r=e instanceof Error?e.message:String(e);return f.sendLoggingMessage({level:"error",data:{message:`[${new Date().toISOString()}] Error processing request: ${r}`,tool:i.params.name,arguments:i.params.arguments,timestamp:new Date().toISOString(),duration:Date.now()-t}}),{success:!1,content:[{type:"text",text:r}]}}finally{f.sendLoggingMessage({level:"info",data:`[${new Date().toISOString()}] Request completed in ${Date.now()-t}ms`})}});async function Ve(i){switch(fe){case"searxng":{let t={...w,...i,apiKey:T},{categories:e,language:r}=w;return e&&(t.categories=e),r&&(t.language=r),await X(t)}case"tavily":return await Y({...w,...i,apiKey:T});case"bing":return await H({...w,...i,apiKey:T});case"duckduckgo":{let t=i.safeSearch??0,e=[I.SafeSearchType.STRICT,I.SafeSearchType.MODERATE,I.SafeSearchType.OFF];return await W({...w,...i,apiKey:T,safeSearch:e[t]})}case"local":return await he({...w,...i});default:throw new Error(`Unsupported search provider: ${fe}`)}}async function Xe(i,t){let e=await Ee.scrapeUrl(i,{...t});if(!e.success)throw new Error(`Failed to scrape: ${e.error}`);let r=[];return e.markdown&&r.push(e.markdown),e.rawHtml&&r.push(e.rawHtml),e.links&&r.push(e.links.join(`
|
|
9
|
+
`)),e.screenshot&&r.push(e.screenshot),e.html&&r.push(e.html),e.extract&&r.push(e.extract),{content:[{type:"text",text:r.join(`
|
|
10
|
+
|
|
11
|
+
`)||"No content found"}],result:e,success:!0}}async function Ke(i,t){let e=await Ee.mapUrl(i,{...t});if("error"in e)throw new Error(`Failed to map: ${e.error}`);if(!e.links)throw new Error(`No links found from: ${i}`);return{content:[{type:"text",text:e.links.join(`
|
|
12
|
+
`).trim()}],result:e.links,success:!0}}function Ye(i){return typeof i=="object"&&i!==null&&"query"in i&&typeof i.query=="string"}function ze(i){return typeof i=="object"&&i!==null&&"url"in i&&typeof i.url=="string"}function Je(i){return typeof i=="object"&&i!==null&&"url"in i&&typeof i.url=="string"}async function Qe(){try{process.stdout.write(`Starting OneSearch MCP server...
|
|
13
|
+
`);let i=new we.StdioServerTransport;await f.connect(i),f.sendLoggingMessage({level:"info",data:"OneSearch MCP server started"})}catch(i){let t=i instanceof Error?i.message:String(i);process.stderr.write(`Error starting server: ${t}
|
|
14
|
+
`),process.exit(1)}}Qe().catch(i=>{let t=i instanceof Error?i.message:String(i);process.stderr.write(`Error running server: ${t}
|
|
15
|
+
`),process.exit(1)});
|
|
1952
16
|
//# sourceMappingURL=index.cjs.map
|