@dyyz1993/agent-browser 0.26.2 → 0.26.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agent-browser.js +7 -14
- package/dist/actions/crawl.d.ts +1 -1
- package/dist/actions/crawl.d.ts.map +1 -1
- package/dist/actions/crawl.js +270 -33
- package/dist/actions/crawl.js.map +1 -1
- package/dist/actions/map.d.ts.map +1 -1
- package/dist/actions/map.js +111 -23
- package/dist/actions/map.js.map +1 -1
- package/dist/actions/scrape.d.ts +19 -0
- package/dist/actions/scrape.d.ts.map +1 -1
- package/dist/actions/scrape.js +79 -3
- package/dist/actions/scrape.js.map +1 -1
- package/dist/actions/search.d.ts.map +1 -1
- package/dist/actions/search.js +77 -20
- package/dist/actions/search.js.map +1 -1
- package/dist/actions/utils.d.ts.map +1 -1
- package/dist/actions/utils.js +124 -54
- package/dist/actions/utils.js.map +1 -1
- package/dist/cli/commands.d.ts.map +1 -1
- package/dist/cli/commands.js +82 -0
- package/dist/cli/commands.js.map +1 -1
- package/dist/cli/help.d.ts.map +1 -1
- package/dist/cli/help.js +66 -10
- package/dist/cli/help.js.map +1 -1
- package/dist/protocol.d.ts.map +1 -1
- package/dist/protocol.js +29 -0
- package/dist/protocol.js.map +1 -1
- package/dist/types.d.ts +25 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +4 -1
- package/scripts/postinstall.js +38 -39
- package/bin/agent-browser-darwin-arm64 +0 -0
package/dist/actions/map.js
CHANGED
|
@@ -1,34 +1,93 @@
|
|
|
1
1
|
import { successResponse } from '../protocol.js';
|
|
2
2
|
import { discoverLinks } from './crawl.js';
|
|
3
|
-
|
|
3
|
+
function parseSitemapXml(xml) {
|
|
4
4
|
const urls = [];
|
|
5
|
+
const urlMatches = xml.matchAll(/<loc>([^<]+)<\/loc>/g);
|
|
6
|
+
for (const match of urlMatches) {
|
|
7
|
+
urls.push(match[1].trim());
|
|
8
|
+
}
|
|
9
|
+
return urls;
|
|
10
|
+
}
|
|
11
|
+
function parseSitemapIndex(xml) {
|
|
12
|
+
const sitemapUrls = [];
|
|
13
|
+
const locRegex = /<sitemap[^>]*>[\s\S]*?<loc>([^<]+)<\/loc>[\s\S]*?<\/sitemap>/gi;
|
|
14
|
+
let match;
|
|
15
|
+
while ((match = locRegex.exec(xml)) !== null) {
|
|
16
|
+
sitemapUrls.push(match[1].trim());
|
|
17
|
+
}
|
|
18
|
+
return sitemapUrls;
|
|
19
|
+
}
|
|
20
|
+
async function discoverSitemapsFromRobots(origin) {
|
|
5
21
|
try {
|
|
6
|
-
const
|
|
7
|
-
const sitemapUrl = `${base.origin}/sitemap.xml`;
|
|
8
|
-
const response = await fetch(sitemapUrl, {
|
|
22
|
+
const res = await fetch(`${origin}/robots.txt`, {
|
|
9
23
|
signal: AbortSignal.timeout(5000),
|
|
10
24
|
});
|
|
11
|
-
if (
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
25
|
+
if (!res.ok)
|
|
26
|
+
return [];
|
|
27
|
+
const text = await res.text();
|
|
28
|
+
return text
|
|
29
|
+
.split('\n')
|
|
30
|
+
.filter((l) => l.toLowerCase().startsWith('sitemap:'))
|
|
31
|
+
.map((l) => l.split(':').slice(1).join(':').trim());
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
return [];
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
async function fetchSitemapUrls(baseUrl) {
|
|
38
|
+
const allUrls = [];
|
|
39
|
+
const base = new URL(baseUrl);
|
|
40
|
+
const robotsSitemaps = await discoverSitemapsFromRobots(base.origin);
|
|
41
|
+
const sitemapUrls = [
|
|
42
|
+
...robotsSitemaps,
|
|
43
|
+
new URL('/sitemap.xml', baseUrl).href,
|
|
44
|
+
new URL('/sitemap_index.xml', baseUrl).href,
|
|
45
|
+
new URL('/sitemap/', baseUrl).href,
|
|
46
|
+
];
|
|
47
|
+
const tried = new Set();
|
|
48
|
+
for (const sitemapUrl of sitemapUrls) {
|
|
49
|
+
if (tried.has(sitemapUrl))
|
|
50
|
+
continue;
|
|
51
|
+
tried.add(sitemapUrl);
|
|
52
|
+
try {
|
|
53
|
+
const res = await fetch(sitemapUrl, { signal: AbortSignal.timeout(10000) });
|
|
54
|
+
if (!res.ok)
|
|
55
|
+
continue;
|
|
56
|
+
const xml = await res.text();
|
|
57
|
+
if (xml.includes('<sitemapindex')) {
|
|
58
|
+
const childUrls = parseSitemapIndex(xml);
|
|
59
|
+
for (const childUrl of childUrls) {
|
|
60
|
+
try {
|
|
61
|
+
const childRes = await fetch(childUrl, { signal: AbortSignal.timeout(10000) });
|
|
62
|
+
if (childRes.ok) {
|
|
63
|
+
const childXml = await childRes.text();
|
|
64
|
+
allUrls.push(...parseSitemapXml(childXml));
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
catch {
|
|
68
|
+
// child sitemap fetch failed
|
|
20
69
|
}
|
|
21
70
|
}
|
|
22
|
-
catch {
|
|
23
|
-
// invalid URL
|
|
24
|
-
}
|
|
25
71
|
}
|
|
72
|
+
else {
|
|
73
|
+
allUrls.push(...parseSitemapXml(xml));
|
|
74
|
+
}
|
|
75
|
+
if (allUrls.length > 0)
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
catch {
|
|
79
|
+
// sitemap not available
|
|
26
80
|
}
|
|
27
81
|
}
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
82
|
+
return allUrls.filter((url) => {
|
|
83
|
+
try {
|
|
84
|
+
const parsedUrl = new URL(url);
|
|
85
|
+
return parsedUrl.hostname === base.hostname;
|
|
86
|
+
}
|
|
87
|
+
catch {
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
});
|
|
32
91
|
}
|
|
33
92
|
export async function handleMap(command, browser) {
|
|
34
93
|
if (!browser.isLaunched()) {
|
|
@@ -43,7 +102,7 @@ export async function handleMap(command, browser) {
|
|
|
43
102
|
const baseUrl = command.url;
|
|
44
103
|
const baseOrigin = new URL(baseUrl).origin;
|
|
45
104
|
const baseHostname = new URL(baseUrl).hostname.replace(/^www\./, '');
|
|
46
|
-
const sitemapUrls = await
|
|
105
|
+
const sitemapUrls = await fetchSitemapUrls(baseUrl);
|
|
47
106
|
const page = browser.getPage();
|
|
48
107
|
await page.goto(baseUrl, {
|
|
49
108
|
timeout: timeout * 1000,
|
|
@@ -70,10 +129,39 @@ export async function handleMap(command, browser) {
|
|
|
70
129
|
}
|
|
71
130
|
}
|
|
72
131
|
const urls = Array.from(allUrls).slice(0, limit);
|
|
132
|
+
const filtered = command.excludePatterns || command.includePatterns
|
|
133
|
+
? urls.filter((url) => {
|
|
134
|
+
if (command.excludePatterns?.length) {
|
|
135
|
+
for (const pattern of command.excludePatterns) {
|
|
136
|
+
if (globMatch(url, pattern))
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
if (command.includePatterns?.length) {
|
|
141
|
+
for (const pattern of command.includePatterns) {
|
|
142
|
+
if (globMatch(url, pattern))
|
|
143
|
+
return true;
|
|
144
|
+
}
|
|
145
|
+
return false;
|
|
146
|
+
}
|
|
147
|
+
return true;
|
|
148
|
+
})
|
|
149
|
+
: urls;
|
|
73
150
|
return successResponse(command.id, {
|
|
74
151
|
url: baseUrl,
|
|
75
|
-
urls,
|
|
76
|
-
total:
|
|
152
|
+
urls: filtered,
|
|
153
|
+
total: filtered.length,
|
|
77
154
|
});
|
|
78
155
|
}
|
|
156
|
+
function globMatch(url, pattern) {
|
|
157
|
+
const regex = new RegExp('^' +
|
|
158
|
+
pattern
|
|
159
|
+
.replace(/[.+^${}()|[\]\\]/g, '\\$&')
|
|
160
|
+
.replace(/\*\*/g, '<<<GLOBSTAR>>>')
|
|
161
|
+
.replace(/\*/g, '[^/]*')
|
|
162
|
+
.replace(/<<<GLOBSTAR>>>/g, '.*')
|
|
163
|
+
.replace(/\?/g, '[^/]') +
|
|
164
|
+
'$');
|
|
165
|
+
return regex.test(url);
|
|
166
|
+
}
|
|
79
167
|
//# sourceMappingURL=map.js.map
|
package/dist/actions/map.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"map.js","sourceRoot":"","sources":["../../src/actions/map.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAE3C,
|
|
1
|
+
{"version":3,"file":"map.js","sourceRoot":"","sources":["../../src/actions/map.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAE3C,SAAS,eAAe,CAAC,GAAW;IAClC,MAAM,IAAI,GAAa,EAAE,CAAC;IAC1B,MAAM,UAAU,GAAG,GAAG,CAAC,QAAQ,CAAC,sBAAsB,CAAC,CAAC;IACxD,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;QAC/B,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,iBAAiB,CAAC,GAAW;IACpC,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,MAAM,QAAQ,GAAG,gEAAgE,CAAC;IAClF,IAAI,KAAK,CAAC;IACV,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC7C,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACpC,CAAC;IACD,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,KAAK,UAAU,0BAA0B,CAAC,MAAc;IACtD,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,MAAM,aAAa,EAAE;YAC9C,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC;SAClC,CAAC,CAAC;QACH,IAAI,CAAC,GAAG,CAAC,EAAE;YAAE,OAAO,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,OAAO,IAAI;aACR,KAAK,CAAC,IAAI,CAAC;aACX,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;aACrD,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACxD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,OAAe;IAC7C,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAE9B,MAAM,cAAc,GAAG,MAAM,0BAA0B,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAErE,MAAM,WAAW,GAAG;QAClB,GAAG,cAAc;QACjB,IAAI,GAAG,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC,IAAI;QACrC,IAAI,GAAG,CAAC,oBAAoB,EAAE,OAAO,CAAC,CAAC,IAAI;QAC3C,IAAI,GAAG,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,IAAI;KACnC,CAAC;IAEF,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAChC,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACrC,IAAI,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC;YAAE,SAAS;QACpC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,UAAU,EAAE,EAAE,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YAC5E,IAAI,CAAC,GAAG,CAAC,EAAE;gBAAE,SAAS;YACtB,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;YAE7B,IAAI,GAAG,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;gBAClC,MAAM,SAAS,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;gBACzC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;oBACjC,IAAI,CAAC;wBACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,QAAQ,EAAE,EAAE,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;wBAC/E,IAAI,QAAQ,CAAC,EAAE,EAAE,CAAC;4BAChB,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;4BACvC,OAAO,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC,CAAC;wBAC7C,CAAC;oBACH,CAAC;oBAAC,MAAM,CAAC;wBACP,6BAA6B;oBAC/B,CAAC;gBACH,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC;YACxC,CAAC;YAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;gBAAE,MAAM;QAChC,CAAC;QAAC,MAAM,CAAC;YACP,wBAAwB;QAC1B,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE;QAC5B,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YAC/B,OAAO,SAAS,CAAC,QAAQ,KAAK,IAAI,CAAC,QAAQ,CAAC;QAC9C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,OAAmB,EACnB,OAAuB;IAEvB,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;QAC1B,MAAM,OAAO,CAAC,MAAM,CAAC;YACnB,EAAE,EAAE,MAAM;YACV,MAAM,EAAE,QAAQ;YAChB,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,IAAI;SACnC,CAAC,CAAC;IACL,CAAC;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,GAAG,CAAC;IACnC,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;IACtC,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC;IAC5B,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAC3C,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IAErE,MAAM,WAAW,GAAG,MAAM,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAEpD,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;IAC/B,MAAM,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE;QACvB,OAAO,EAAE,OAAO,GAAG,IAAI;QACvB,SAAS,EAAE,kBAAkB;KAC9B,CAAC,CAAC;IAEH,MAAM,OAAO,CAAC,IAAI,CAAC;QACjB,IAAI;aACD,gBAAgB,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC;aAC5E,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC;QAClB,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC;KAC1B,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,UAAU,EAAE,YAAY,EAAE,EAAE,CAAC,CAAC;IAEzE,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAElC,KAAK,MAAM,GAAG,IAAI,CAAC,GAAG,WAAW,EAAE,GAAG,QAAQ,CAAC,EAAE,CAAC;QAChD,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YAC9C,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC;YACnC,IAAI,MAAM,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;gBACjC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,cAAc;QAChB,CAAC;IACH,CAAC;IAED,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAEjD,MAAM,QAAQ,GACZ,OAAO,CAAC,eAAe,IAAI,OAAO,CAAC,eAAe;QAChD,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE;YAClB,IAAI,OAAO,CAAC,eAAe,EAAE,MAAM,EAAE,CAAC;gBACpC,KAAK,MAAM,OAAO,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;oBAC9C,IAAI,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC;wBAAE,OAAO,KAAK,CAAC;gBAC5C,CAAC;YACH,CAAC;YACD,IAAI,OAAO,CAAC,eAAe,EAAE,MAAM,EAAE,CAAC;gBACpC,KAAK,MAAM,OAAO,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;oBAC9C,IAAI,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC;wBAAE,OAAO,IAAI,CAAC;gBAC3C,CAAC;gBACD,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC;QACJ,CAAC,CAAC,IAAI,CAAC;IAEX,OAAO,eAAe,CAAC,OAAO,CAAC,EAAE,EAAE;QACjC,GAAG,EAAE,OAAO;QACZ,IAAI,EAAE,QAAQ;QACd,KAAK,EAAE,QAAQ,CAAC,MAAM;KACvB,CAAC,CAAC;AACL,CAAC;AAED,SAAS,SAAS,CAAC,GAAW,EAAE,OAAe;IAC7C,MAAM,KAAK,GAAG,IAAI,MAAM,CACtB,GAAG;QACD,OAAO;aACJ,OAAO,CAAC,mBAAmB,EAAE,MAAM,CAAC;aACpC,OAAO,CAAC,OAAO,EAAE,gBAAgB,CAAC;aAClC,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC;aACvB,OAAO,CAAC,iBAAiB,EAAE,IAAI,CAAC;aAChC,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC;QACzB,GAAG,CACN,CAAC;IACF,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC"}
|
package/dist/actions/scrape.d.ts
CHANGED
|
@@ -1,10 +1,29 @@
|
|
|
1
1
|
import type { BrowserManager } from '../browser/index.js';
|
|
2
2
|
import type { ScrapeCommand, Response } from '../types.js';
|
|
3
|
+
export interface ScrapeMetadata {
|
|
4
|
+
description?: string;
|
|
5
|
+
keywords?: string;
|
|
6
|
+
author?: string;
|
|
7
|
+
robots?: string;
|
|
8
|
+
canonical?: string;
|
|
9
|
+
favicon?: string;
|
|
10
|
+
lang?: string;
|
|
11
|
+
ogTitle?: string;
|
|
12
|
+
ogDescription?: string;
|
|
13
|
+
ogImage?: string;
|
|
14
|
+
ogUrl?: string;
|
|
15
|
+
ogSiteName?: string;
|
|
16
|
+
publishedTime?: string;
|
|
17
|
+
modifiedTime?: string;
|
|
18
|
+
articleTag?: string;
|
|
19
|
+
articleSection?: string;
|
|
20
|
+
}
|
|
3
21
|
export interface ScrapeResult {
|
|
4
22
|
url: string;
|
|
5
23
|
title: string;
|
|
6
24
|
content: string;
|
|
7
25
|
format: 'text' | 'html' | 'markdown';
|
|
26
|
+
metadata?: ScrapeMetadata;
|
|
8
27
|
}
|
|
9
28
|
export declare function handleScrape(command: ScrapeCommand, browser: BrowserManager): Promise<Response<ScrapeResult>>;
|
|
10
29
|
//# sourceMappingURL=scrape.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../src/actions/scrape.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../src/actions/scrape.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,KAAK,EAAE,aAAa,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,cAAc;IAC7B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,UAAU,CAAC;IACrC,QAAQ,CAAC,EAAE,cAAc,CAAC;CAC3B;AAED,wBAAsB,YAAY,CAChC,OAAO,EAAE,aAAa,EACtB,OAAO,EAAE,cAAc,GACtB,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,CAgIjC"}
|
package/dist/actions/scrape.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
1
2
|
import { successResponse } from '../protocol.js';
|
|
2
3
|
import { extractContentFromPage, waitForSPAContent } from './utils.js';
|
|
3
4
|
export async function handleScrape(command, browser) {
|
|
4
|
-
|
|
5
|
+
let page = browser.getPage();
|
|
5
6
|
if (!page) {
|
|
6
7
|
return {
|
|
7
8
|
id: command.id,
|
|
@@ -11,6 +12,28 @@ export async function handleScrape(command, browser) {
|
|
|
11
12
|
}
|
|
12
13
|
const timeout = (command.timeout ?? 15) * 1000;
|
|
13
14
|
try {
|
|
15
|
+
if (command.javaScriptEnabled === false) {
|
|
16
|
+
const browserInstance = browser.getBrowser();
|
|
17
|
+
if (browserInstance) {
|
|
18
|
+
page = await browserInstance.newPage({ javaScriptEnabled: false });
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
if (command.cookies && command.cookies.length > 0) {
|
|
22
|
+
await page.context().addCookies(command.cookies);
|
|
23
|
+
}
|
|
24
|
+
const currentUrl = page.url();
|
|
25
|
+
if (currentUrl !== 'about:blank' && currentUrl !== command.url) {
|
|
26
|
+
try {
|
|
27
|
+
const currentHost = new URL(currentUrl).hostname;
|
|
28
|
+
const targetHost = new URL(command.url).hostname;
|
|
29
|
+
if (currentHost !== targetHost) {
|
|
30
|
+
await page.goto('about:blank').catch(() => { });
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
await page.goto('about:blank').catch(() => { });
|
|
35
|
+
}
|
|
36
|
+
}
|
|
14
37
|
await page.goto(command.url, {
|
|
15
38
|
timeout,
|
|
16
39
|
waitUntil: 'domcontentloaded',
|
|
@@ -20,14 +43,67 @@ export async function handleScrape(command, browser) {
|
|
|
20
43
|
page.waitForTimeout(3000),
|
|
21
44
|
]);
|
|
22
45
|
await waitForSPAContent(page, 3000);
|
|
46
|
+
if (command.waitForSelector) {
|
|
47
|
+
await page
|
|
48
|
+
.locator(command.waitForSelector)
|
|
49
|
+
.first()
|
|
50
|
+
.waitFor({
|
|
51
|
+
state: 'visible',
|
|
52
|
+
timeout: Math.min(timeout, 10000),
|
|
53
|
+
});
|
|
54
|
+
}
|
|
23
55
|
const format = command.format ?? 'markdown';
|
|
24
56
|
const content = await extractContentFromPage(page, format, command.selector);
|
|
25
|
-
|
|
57
|
+
let metadata;
|
|
58
|
+
if (command.includeMetadata) {
|
|
59
|
+
metadata = await page.evaluate(() => {
|
|
60
|
+
const getMeta = (name) => document.querySelector(`meta[name="${name}"]`)?.getAttribute('content') ||
|
|
61
|
+
document.querySelector(`meta[property="${name}"]`)?.getAttribute('content') ||
|
|
62
|
+
'';
|
|
63
|
+
const getLink = (rel) => document.querySelector(`link[rel="${rel}"]`)?.getAttribute('href') ||
|
|
64
|
+
document.querySelector(`link[rel*="${rel}"]`)?.getAttribute('href') ||
|
|
65
|
+
'';
|
|
66
|
+
const favicon = getLink('icon') || getLink('shortcut icon') || '/favicon.ico';
|
|
67
|
+
const resolvedFavicon = favicon && !favicon.startsWith('http')
|
|
68
|
+
? new URL(favicon, window.location.href).href
|
|
69
|
+
: favicon;
|
|
70
|
+
return {
|
|
71
|
+
title: document.title || '',
|
|
72
|
+
description: getMeta('description'),
|
|
73
|
+
keywords: getMeta('keywords'),
|
|
74
|
+
author: getMeta('author'),
|
|
75
|
+
robots: getMeta('robots'),
|
|
76
|
+
canonical: document.querySelector('link[rel="canonical"]')?.getAttribute('href') || '',
|
|
77
|
+
favicon: resolvedFavicon,
|
|
78
|
+
lang: document.documentElement.lang || '',
|
|
79
|
+
ogTitle: getMeta('og:title'),
|
|
80
|
+
ogDescription: getMeta('og:description'),
|
|
81
|
+
ogImage: getMeta('og:image'),
|
|
82
|
+
ogUrl: getMeta('og:url'),
|
|
83
|
+
ogSiteName: getMeta('og:site_name'),
|
|
84
|
+
publishedTime: getMeta('article:published_time'),
|
|
85
|
+
modifiedTime: getMeta('article:modified_time'),
|
|
86
|
+
articleTag: getMeta('article:tag'),
|
|
87
|
+
articleSection: getMeta('article:section'),
|
|
88
|
+
};
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
const result = {
|
|
26
92
|
url: page.url(),
|
|
27
93
|
title: await page.title(),
|
|
28
94
|
content,
|
|
29
95
|
format,
|
|
30
|
-
|
|
96
|
+
...(metadata ? { metadata } : {}),
|
|
97
|
+
};
|
|
98
|
+
if (command.outputFile) {
|
|
99
|
+
const output = format === 'html' ? result.content : JSON.stringify(result, null, 2);
|
|
100
|
+
fs.writeFileSync(command.outputFile, output, 'utf-8');
|
|
101
|
+
return successResponse(command.id, {
|
|
102
|
+
...result,
|
|
103
|
+
savedTo: command.outputFile,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
return successResponse(command.id, result);
|
|
31
107
|
}
|
|
32
108
|
catch (error) {
|
|
33
109
|
return {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrape.js","sourceRoot":"","sources":["../../src/actions/scrape.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"scrape.js","sourceRoot":"","sources":["../../src/actions/scrape.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,IAAI,CAAC;AAGpB,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AA6BvE,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,OAAsB,EACtB,OAAuB;IAEvB,IAAI,IAAI,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;IAC7B,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO;YACL,EAAE,EAAE,OAAO,CAAC,EAAE;YACd,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,8BAA8B;SACtC,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,CAAC,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC,GAAG,IAAI,CAAC;IAE/C,IAAI,CAAC;QACH,IAAI,OAAO,CAAC,iBAAiB,KAAK,KAAK,EAAE,CAAC;YACxC,MAAM,eAAe,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC;YAC7C,IAAI,eAAe,EAAE,CAAC;gBACpB,IAAI,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,EAAE,iBAAiB,EAAE,KAAK,EAAE,CAAC,CAAC;YACrE,CAAC;QACH,CAAC;QAED,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClD,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QACnD,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC9B,IAAI,UAAU,KAAK,aAAa,IAAI,UAAU,KAAK,OAAO,CAAC,GAAG,EAAE,CAAC;YAC/D,IAAI,CAAC;gBACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,QAAQ,CAAC;gBACjD,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;gBACjD,IAAI,WAAW,KAAK,UAAU,EAAE,CAAC;oBAC/B,MAAM,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;gBACjD,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;YACjD,CAAC;QACH,CAAC;QAED,MAAM,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YAC3B,OAAO;YACP,SAAS,EAAE,kBAAkB;SAC9B,CAAC,CAAC;QAEH,MAAM,OAAO,CAAC,IAAI,CAAC;YACjB,IAAI,CAAC,gBAAgB,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC;YAC1F,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC;SAC1B,CAAC,CAAC;QAEH,MAAM,iBAAiB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QAEpC,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;YAC5B,MAAM,IAAI;iBACP,OAAO,CAAC,OAAO,CAAC,eAAe,CAAC;iBAChC,KAAK,EAAE;iBACP,OAAO,CAAC;gBACP,KAAK,EAAE,SAAS;gBAChB,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,KAAK,CAAC;aAClC,CAAC,CAAC;QACP,CAAC;QAED,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,UAAU,CAAC;QAC5C,MAAM,OAAO,GAAG,MAAM,sBAAsB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;QAE7E,IAAI,QAAoC,CAAC;QACzC,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;YAC5B,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;gBAClC,MAAM,OAAO,GAAG,CAAC,IAAY,EAAE,EAAE,CAC/B,QAAQ,CAAC,aAAa,CAAC,cAAc,IAAI,IAAI,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC;oBACvE,QAAQ,CAAC,aAAa,CAAC,kBAAkB,IAAI,IAAI,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC;oBAC3E,EAAE,CAAC;gBAEL,MAAM,OAAO,GAAG,CAAC,GAAW,EAAE,EAAE,CAC9B,QAAQ,CAAC,aAAa,CAAC,aAAa,GAAG,IAAI,CAAC,EAAE,YAAY,CAAC,MAAM,CAAC;oBAClE,QAAQ,CAAC,aAAa,CAAC,cAAc,GAAG,IAAI,CAAC,EAAE,YAAY,CAAC,MAAM,CAAC;oBACnE,EAAE,CAAC;gBAEL,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,eAAe,CAAC,IAAI,cAAc,CAAC;gBAC9E,MAAM,eAAe,GACnB,OAAO,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC;oBACpC,CAAC,CAAC,IAAI,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI;oBAC7C,CAAC,CAAC,OAAO,CAAC;gBAEd,OAAO;oBACL,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,EAAE;oBAC3B,WAAW,EAAE,OAAO,CAAC,aAAa,CAAC;oBACnC,QAAQ,EAAE,OAAO,CAAC,UAAU,CAAC;oBAC7B,MAAM,EAAE,OAAO,CAAC,QAAQ,CAAC;oBACzB,MAAM,EAAE,OAAO,CAAC,QAAQ,CAAC;oBACzB,SAAS,EAAE,QAAQ,CAAC,aAAa,CAAC,uBAAuB,CAAC,EAAE,YAAY,CAAC,MAAM,CAAC,IAAI,EAAE;oBACtF,OAAO,EAAE,eAAe;oBACxB,IAAI,EAAE,QAAQ,CAAC,eAAe,CAAC,IAAI,IAAI,EAAE;oBACzC,OAAO,EAAE,OAAO,CAAC,UAAU,CAAC;oBAC5B,aAAa,EAAE,OAAO,CAAC,gBAAgB,CAAC;oBACxC,OAAO,EAAE,OAAO,CAAC,UAAU,CAAC;oBAC5B,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC;oBACxB,UAAU,EAAE,OAAO,CAAC,cAAc,CAAC;oBACnC,aAAa,EAAE,OAAO,CAAC,wBAAwB,CAAC;oBAChD,YAAY,EAAE,OAAO,CAAC,uBAAuB,CAAC;oBAC9C,UAAU,EAAE,OAAO,CAAC,aAAa,CAAC;oBAClC,cAAc,EAAE,OAAO,CAAC,iBAAiB,CAAC;iBAC3C,CAAC;YACJ,CAAC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,MAAM,GAAiB;YAC3B,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE;YACf,KAAK,EAAE,MAAM,IAAI,CAAC,KAAK,EAAE;YACzB,OAAO;YACP,MAAM;YACN,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAClC,CAAC;QAEF,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;YACvB,MAAM,MAAM,GAAG,MAAM,KAAK,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YACpF,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;YACtD,OAAO,eAAe,CAAC,OAAO,CAAC,EAAE,EAAE;gBACjC,GAAG,MAAM;gBACT,OAAO,EAAE,OAAO,CAAC,UAAU;aAC5B,CAAC,CAAC;QACL,CAAC;QAED,OAAO,eAAe,CAAC,OAAO,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;IAC7C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,EAAE,EAAE,OAAO,CAAC,EAAE;YACd,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../src/actions/search.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../src/actions/search.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,KAAK,EAAE,aAAa,EAAE,QAAQ,EAAE,cAAc,EAAgB,MAAM,aAAa,CAAC;AAmIzF,wBAAsB,YAAY,CAChC,OAAO,EAAE,aAAa,EACtB,OAAO,EAAE,cAAc,GACtB,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC,CAgFnC"}
|
package/dist/actions/search.js
CHANGED
|
@@ -1,15 +1,49 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
1
2
|
import { successResponse } from '../protocol.js';
|
|
2
|
-
const
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
};
|
|
3
|
+
const STEALTH_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36';
|
|
4
|
+
async function applyStealth(page) {
|
|
5
|
+
await page.addInitScript(() => {
|
|
6
|
+
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
|
7
|
+
});
|
|
8
|
+
await page.addInitScript(() => {
|
|
9
|
+
window.chrome = { runtime: {} };
|
|
10
|
+
});
|
|
11
|
+
await page.addInitScript(() => {
|
|
12
|
+
const originalQuery = window.navigator.permissions.query;
|
|
13
|
+
window.navigator.permissions.query = (parameters) => parameters.name === 'notifications'
|
|
14
|
+
? Promise.resolve({ state: Notification.permission })
|
|
15
|
+
: originalQuery(parameters);
|
|
16
|
+
});
|
|
17
|
+
await page.addInitScript(() => {
|
|
18
|
+
Object.defineProperty(navigator, 'plugins', {
|
|
19
|
+
get: () => [1, 2, 3, 4, 5],
|
|
20
|
+
});
|
|
21
|
+
});
|
|
22
|
+
await page.addInitScript(() => {
|
|
23
|
+
Object.defineProperty(navigator, 'languages', {
|
|
24
|
+
get: () => ['en-US', 'en'],
|
|
25
|
+
});
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
function buildSearchUrl(engine, query) {
|
|
29
|
+
const encoded = encodeURIComponent(query);
|
|
30
|
+
switch (engine) {
|
|
31
|
+
case 'google':
|
|
32
|
+
return `https://www.google.com/search?q=${encoded}&udm=14`;
|
|
33
|
+
case 'bing':
|
|
34
|
+
return `https://www.bing.com/search?q=${encoded}`;
|
|
35
|
+
case 'duckduckgo':
|
|
36
|
+
return `https://html.duckduckgo.com/html/?q=${encoded}`;
|
|
37
|
+
default:
|
|
38
|
+
return `https://www.google.com/search?q=${encoded}&udm=14`;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
7
41
|
const SELECTORS = {
|
|
8
42
|
google: {
|
|
9
43
|
result: 'div.g, div[data-hveid]',
|
|
10
44
|
title: 'h3',
|
|
11
45
|
link: 'a[href]',
|
|
12
|
-
snippet: 'div[data-sncf-ied="cf"] span, div.VwiC3b, div.st',
|
|
46
|
+
snippet: 'div[data-sncf-ied="cf"] span, div.VwiC3b, div.st, span[style]',
|
|
13
47
|
},
|
|
14
48
|
bing: {
|
|
15
49
|
result: 'li.b_algo',
|
|
@@ -18,10 +52,10 @@ const SELECTORS = {
|
|
|
18
52
|
snippet: 'p, div.b_caption p',
|
|
19
53
|
},
|
|
20
54
|
duckduckgo: {
|
|
21
|
-
result: '
|
|
22
|
-
title: '
|
|
23
|
-
link: 'a
|
|
24
|
-
snippet: 'a.result__snippet,
|
|
55
|
+
result: 'div.result',
|
|
56
|
+
title: 'a.result__a',
|
|
57
|
+
link: 'a.result__a',
|
|
58
|
+
snippet: 'a.result__snippet, td.result__snippet',
|
|
25
59
|
},
|
|
26
60
|
};
|
|
27
61
|
async function parseGoogleResults(page, limit) {
|
|
@@ -63,9 +97,9 @@ async function parseDuckDuckGoResults(page, limit) {
|
|
|
63
97
|
const elements = await page.locator(SELECTORS.duckduckgo.result).all();
|
|
64
98
|
for (const el of elements.slice(0, limit)) {
|
|
65
99
|
try {
|
|
66
|
-
const
|
|
67
|
-
const
|
|
68
|
-
const url = await
|
|
100
|
+
const titleEl = el.locator(SELECTORS.duckduckgo.title);
|
|
101
|
+
const title = await titleEl.textContent();
|
|
102
|
+
const url = await titleEl.getAttribute('href');
|
|
69
103
|
const snippet = await el.locator(SELECTORS.duckduckgo.snippet).textContent();
|
|
70
104
|
if (title && url && url.startsWith('http')) {
|
|
71
105
|
results.push({ title: title.trim(), url, snippet: snippet?.trim() });
|
|
@@ -76,6 +110,8 @@ async function parseDuckDuckGoResults(page, limit) {
|
|
|
76
110
|
return results;
|
|
77
111
|
}
|
|
78
112
|
export async function handleSearch(command, browser) {
|
|
113
|
+
const useStealth = command.stealth !== false;
|
|
114
|
+
const engine = command.engine ?? 'google';
|
|
79
115
|
if (!browser.isLaunched()) {
|
|
80
116
|
await browser.launch({
|
|
81
117
|
id: 'auto',
|
|
@@ -84,19 +120,36 @@ export async function handleSearch(command, browser) {
|
|
|
84
120
|
});
|
|
85
121
|
}
|
|
86
122
|
const page = browser.getPage();
|
|
87
|
-
const engine = command.engine ?? 'google';
|
|
88
123
|
const limit = command.limit ?? 10;
|
|
89
124
|
const timeout = (command.timeout ?? 15) * 1000;
|
|
90
125
|
try {
|
|
91
|
-
|
|
126
|
+
if (useStealth && engine !== 'bing') {
|
|
127
|
+
await applyStealth(page);
|
|
128
|
+
try {
|
|
129
|
+
await page.context().setExtraHTTPHeaders({
|
|
130
|
+
'User-Agent': STEALTH_USER_AGENT,
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
catch { }
|
|
134
|
+
try {
|
|
135
|
+
await page.setViewportSize({ width: 1920, height: 1080 });
|
|
136
|
+
}
|
|
137
|
+
catch { }
|
|
138
|
+
}
|
|
139
|
+
const searchUrl = buildSearchUrl(engine, command.query);
|
|
92
140
|
await page.goto(searchUrl, {
|
|
93
141
|
timeout,
|
|
94
142
|
waitUntil: 'domcontentloaded',
|
|
95
143
|
});
|
|
96
|
-
|
|
97
|
-
page.waitForLoadState('
|
|
98
|
-
|
|
99
|
-
|
|
144
|
+
if (engine === 'duckduckgo') {
|
|
145
|
+
await page.waitForLoadState('domcontentloaded', { timeout });
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
await Promise.race([
|
|
149
|
+
page.waitForLoadState('networkidle', { timeout: Math.min(timeout, 5000) }).catch(() => { }),
|
|
150
|
+
page.waitForTimeout(3000),
|
|
151
|
+
]);
|
|
152
|
+
}
|
|
100
153
|
let results;
|
|
101
154
|
switch (engine) {
|
|
102
155
|
case 'google':
|
|
@@ -111,12 +164,16 @@ export async function handleSearch(command, browser) {
|
|
|
111
164
|
default:
|
|
112
165
|
results = [];
|
|
113
166
|
}
|
|
114
|
-
|
|
167
|
+
const response = successResponse(command.id, {
|
|
115
168
|
query: command.query,
|
|
116
169
|
engine,
|
|
117
170
|
results,
|
|
118
171
|
total: results.length,
|
|
119
172
|
});
|
|
173
|
+
if (command.outputFile && response.success) {
|
|
174
|
+
fs.writeFileSync(command.outputFile, JSON.stringify(response.data, null, 2), 'utf-8');
|
|
175
|
+
}
|
|
176
|
+
return response;
|
|
120
177
|
}
|
|
121
178
|
catch (error) {
|
|
122
179
|
return {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search.js","sourceRoot":"","sources":["../../src/actions/search.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"search.js","sourceRoot":"","sources":["../../src/actions/search.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,IAAI,CAAC;AAIpB,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAEjD,MAAM,kBAAkB,GACtB,uHAAuH,CAAC;AAE1H,KAAK,UAAU,YAAY,CAAC,IAAU;IACpC,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE;QAC5B,MAAM,CAAC,cAAc,CAAC,SAAS,EAAE,WAAW,EAAE,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,SAAS,EAAE,CAAC,CAAC;IAC1E,CAAC,CAAC,CAAC;IAEH,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE;QAC3B,MAAc,CAAC,MAAM,GAAG,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE;QAC5B,MAAM,aAAa,GAAG,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC,KAAK,CAAC;QACzD,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC,KAAK,GAAG,CAAC,UAAe,EAAE,EAAE,CACvD,UAAU,CAAC,IAAI,KAAK,eAAe;YACjC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,KAAK,EAAE,YAAY,CAAC,UAAU,EAAsB,CAAC;YACzE,CAAC,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC,CAAC,CAAC;IAEH,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE;QAC5B,MAAM,CAAC,cAAc,CAAC,SAAS,EAAE,SAAS,EAAE;YAC1C,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;SAC3B,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE;QAC5B,MAAM,CAAC,cAAc,CAAC,SAAS,EAAE,WAAW,EAAE;YAC5C,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,OAAO,EAAE,IAAI,CAAC;SAC3B,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,cAAc,CAAC,MAAc,EAAE,KAAa;IACnD,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC;IAC1C,QAAQ,MAAM,EAAE,CAAC;QACf,KAAK,QAAQ;YACX,OAAO,mCAAmC,OAAO,SAAS,CAAC;QAC7D,KAAK,MAAM;YACT,OAAO,iCAAiC,OAAO,EAAE,CAAC;QACpD,KAAK,YAAY;YACf,OAAO,uCAAuC,OAAO,EAAE,CAAC;QAC1D;YACE,OAAO,mCAAmC,OAAO,SAAS,CAAC;IAC/D,CAAC;AACH,CAAC;AAED,MAAM,SAAS,GAAG;IAChB,MAAM,EAAE;QACN,MAAM,EAAE,wBAAwB;QAChC,KAAK,EAAE,IAAI;QACX,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,+DAA+D;KACzE;IACD,IAAI,EAAE;QACJ,MAAM,EAAE,WAAW;QACnB,KAAK,EAAE,IAAI;QACX,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,oBAAoB;KAC9B;IACD,UAAU,EAAE;QACV,MAAM,EAAE,YAAY;QACpB,KAAK,EAAE,aAAa;QACpB,IAAI,EAAE,aAAa;QACnB,OAAO,EAAE,uCAAuC;KACjD;CACF,CAAC;AAEF,KAAK,UAAU,kBAAkB,CAAC,IAAU,EAAE,KAAa;IACzD,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC;IAEnE,KAAK,MAAM,EAAE,IAAI,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,CAAC;QAC1C,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;YACrE,MAAM,MAAM,GAAG,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;YACzD,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAC9C,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;YAEzE,IAAI,KAAK,IAAI,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC3C,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC;YACvE,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;IACZ,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,IAAU,EAAE,KAAa;IACvD,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC;IAEjE,KAAK,MAAM,EAAE,IAAI,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,CAAC;QAC1C,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;YACnE,MAAM,MAAM,GAAG,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;YACvD,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAC9C,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;YAEvE,IAAI,KAAK,IAAI,GAAG,EAAE,CAAC;gBACjB,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC;YACvE,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;IACZ,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,KAAK,UAAU,sBAAsB,CAAC,IAAU,EAAE,KAAa;IAC7D,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC;IAEvE,KAAK,MAAM,EAAE,IAAI,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,CAAC;QAC1C,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YACvD,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,WAAW,EAAE,CAAC;YAC1C,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAC/C,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;YAE7E,IAAI,KAAK,IAAI,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC3C,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC;YACvE,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;IACZ,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,OAAsB,EACtB,OAAuB;IAEvB,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,KAAK,KAAK,CAAC;IAC7C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,QAAQ,CAAC;IAE1C,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;QAC1B,MAAM,OAAO,CAAC,MAAM,CAAC;YACnB,EAAE,EAAE,MAAM;YACV,MAAM,EAAE,QAAQ;YAChB,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,IAAI;SACnC,CAAC,CAAC;IACL,CAAC;IAED,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;IAC/B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,CAAC,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC,GAAG,IAAI,CAAC;IAE/C,IAAI,CAAC;QACH,IAAI,UAAU,IAAI,MAAM,KAAK,MAAM,EAAE,CAAC;YACpC,MAAM,YAAY,CAAC,IAAI,CAAC,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,mBAAmB,CAAC;oBACvC,YAAY,EAAE,kBAAkB;iBACjC,CAAC,CAAC;YACL,CAAC;YAAC,MAAM,CAAC,CAAA,CAAC;YACV,IAAI,CAAC;gBACH,MAAM,IAAI,CAAC,eAAe,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;YAC5D,CAAC;YAAC,MAAM,CAAC,CAAA,CAAC;QACZ,CAAC;QAED,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;QAExD,MAAM,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE;YACzB,OAAO;YACP,SAAS,EAAE,kBAAkB;SAC9B,CAAC,CAAC;QAEH,IAAI,MAAM,KAAK,YAAY,EAAE,CAAC;YAC5B,MAAM,IAAI,CAAC,gBAAgB,CAAC,kBAAkB,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;QAC/D,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,CAAC,IAAI,CAAC;gBACjB,IAAI,CAAC,gBAAgB,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC;gBAC1F,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC;aAC1B,CAAC,CAAC;QACL,CAAC;QAED,IAAI,OAAuB,CAAC;QAE5B,QAAQ,MAAM,EAAE,CAAC;YACf,KAAK,QAAQ;gBACX,OAAO,GAAG,MAAM,kBAAkB,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;gBAChD,MAAM;YACR,KAAK,MAAM;gBACT,OAAO,GAAG,MAAM,gBAAgB,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;gBAC9C,MAAM;YACR,KAAK,YAAY;gBACf,OAAO,GAAG,MAAM,sBAAsB,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;gBACpD,MAAM;YACR;gBACE,OAAO,GAAG,EAAE,CAAC;QACjB,CAAC;QAED,MAAM,QAAQ,GAA6B,eAAe,CAAC,OAAO,CAAC,EAAE,EAAE;YACrE,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM;YACN,OAAO;YACP,KAAK,EAAE,OAAO,CAAC,MAAM;SACtB,CAAC,CAAC;QAEH,IAAI,OAAO,CAAC,UAAU,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;YAC3C,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QACxF,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,EAAE,EAAE,OAAO,CAAC,EAAE;YACd,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/actions/utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAErD,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACxD;AAED,eAAO,MAAM,iBAAiB,UAkH7B,CAAC;AA6DF,eAAO,MAAM,uBAAuB,UAKnC,CAAC;AAEF,wBAAsB,sBAAsB,CAC1C,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,UAAU,EACpC,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC,CAkFjB;
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/actions/utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAErD,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACxD;AAED,eAAO,MAAM,iBAAiB,UAkH7B,CAAC;AA6DF,eAAO,MAAM,uBAAuB,UAKnC,CAAC;AAEF,wBAAsB,sBAAsB,CAC1C,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,UAAU,EACpC,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC,CAkFjB;AA2FD,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,OAAO,EAChB,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,OAAO,GACb,OAAO,CAAC,IAAI,CAAC,CAaf;AAED,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,GAAG,KAAK,CAkEzE;AAaD,wBAAsB,iBAAiB,CAAC,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAmBpF;AAwED,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAYnD"}
|