webpeel 0.21.89 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +5 -1
- package/dist/core/search-provider.js +15 -2
- package/dist/core/vertical-search.d.ts +53 -0
- package/dist/core/vertical-search.js +231 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +4 -0
- package/dist/server/app.js +1 -1
- package/dist/server/routes/search.js +199 -3
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +99 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +69 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/general.js +390 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +85 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +213 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +151 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +205 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +508 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +109 -0
- package/dist/server/routes/smart-search/llm.d.ts +8 -0
- package/dist/server/routes/smart-search/llm.js +101 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +30 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +12 -0
- package/dist/server/routes/smart-search/utils.js +97 -0
- package/package.json +1 -1
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Additional search engine providers: Baidu, Yandex
|
|
3
|
+
* HTTP-only scraping with cheerio — no browser, no API key required.
|
|
4
|
+
*/
|
|
5
|
+
import { load } from 'cheerio';
|
|
6
|
+
import { simpleFetch } from './fetcher.js';
|
|
7
|
+
// ── Baidu Search ──────────────────────────────────────────────────────────
|
|
8
|
+
export class BaiduSearchProvider {
|
|
9
|
+
id = 'baidu';
|
|
10
|
+
requiresApiKey = false;
|
|
11
|
+
async searchWeb(query, options) {
|
|
12
|
+
const { count = 10 } = options;
|
|
13
|
+
// Baidu search URL
|
|
14
|
+
const params = new URLSearchParams({
|
|
15
|
+
wd: query,
|
|
16
|
+
rn: String(Math.min(count, 50)),
|
|
17
|
+
ie: 'utf-8',
|
|
18
|
+
});
|
|
19
|
+
const url = `https://www.baidu.com/s?${params}`;
|
|
20
|
+
try {
|
|
21
|
+
const response = await simpleFetch(url, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 15000);
|
|
22
|
+
if (!response.html)
|
|
23
|
+
return [];
|
|
24
|
+
const $ = load(response.html);
|
|
25
|
+
const results = [];
|
|
26
|
+
const seen = new Set();
|
|
27
|
+
// Baidu result selectors: .result or .c-container
|
|
28
|
+
$('.result, .c-container').each((_, elem) => {
|
|
29
|
+
const el = $(elem);
|
|
30
|
+
const linkEl = el.find('h3 a, .t a').first();
|
|
31
|
+
const title = linkEl.text().trim();
|
|
32
|
+
// Baidu uses redirect URLs — get the data-url or mu attribute for real URL
|
|
33
|
+
const href = el.attr('mu') || linkEl.attr('href') || '';
|
|
34
|
+
const snippet = el.find('.c-abstract, .c-span-last, .content-right_8Zs40').first().text().trim();
|
|
35
|
+
if (title && href && !seen.has(href)) {
|
|
36
|
+
seen.add(href);
|
|
37
|
+
results.push({ title, url: href, snippet });
|
|
38
|
+
}
|
|
39
|
+
});
|
|
40
|
+
return results.slice(0, count);
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
return [];
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
// ── Naver Search (Korea) ──────────────────────────────────────────────────
|
|
48
|
+
export class NaverSearchProvider {
|
|
49
|
+
id = 'naver';
|
|
50
|
+
requiresApiKey = false;
|
|
51
|
+
async searchWeb(query, options) {
|
|
52
|
+
const { count = 10 } = options;
|
|
53
|
+
const params = new URLSearchParams({
|
|
54
|
+
query: query,
|
|
55
|
+
where: 'web',
|
|
56
|
+
});
|
|
57
|
+
const url = `https://search.naver.com/search.naver?${params}`;
|
|
58
|
+
try {
|
|
59
|
+
// Naver is heavily JS-rendered — use peel with render for full content
|
|
60
|
+
const { peel } = await import('../index.js');
|
|
61
|
+
const result = await peel(url, { render: true, format: 'html', wait: 2000, timeout: 15000 });
|
|
62
|
+
const html = result.content || '';
|
|
63
|
+
if (!html)
|
|
64
|
+
return [];
|
|
65
|
+
const $ = load(html);
|
|
66
|
+
const results = [];
|
|
67
|
+
const seen = new Set();
|
|
68
|
+
// Naver uses .type-web class for web results, titles in *-title-text classes
|
|
69
|
+
$('.type-web').each((_, elem) => {
|
|
70
|
+
const el = $(elem);
|
|
71
|
+
const parent = el.closest('li, section, [class*=item]').length ? el.closest('li, section, [class*=item]') : el.parent();
|
|
72
|
+
const title = parent.find('[class*="title-text"], [class*="Title"]').first().text().trim();
|
|
73
|
+
const href = parent.find('a[href^="http"]').first().attr('href') || '';
|
|
74
|
+
const snippet = parent.find('[class*="text-type-body"], [class*="desc"]').first().text().trim();
|
|
75
|
+
if (title && href && !href.includes('naver.com/search') && !seen.has(href)) {
|
|
76
|
+
seen.add(href);
|
|
77
|
+
results.push({ title, url: href, snippet });
|
|
78
|
+
}
|
|
79
|
+
});
|
|
80
|
+
// Fallback: try generic link extraction if .type-web yielded nothing
|
|
81
|
+
if (results.length === 0) {
|
|
82
|
+
$('a[href^="http"]').each((_, elem) => {
|
|
83
|
+
const el = $(elem);
|
|
84
|
+
const href = el.attr('href') || '';
|
|
85
|
+
const title = el.text().trim();
|
|
86
|
+
if (title.length > 5 && title.length < 200 && href && !href.includes('naver.com') && !seen.has(href)) {
|
|
87
|
+
seen.add(href);
|
|
88
|
+
results.push({ title, url: href, snippet: '' });
|
|
89
|
+
}
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
return results.slice(0, count);
|
|
93
|
+
}
|
|
94
|
+
catch {
|
|
95
|
+
return [];
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// ── Yahoo Japan Search ────────────────────────────────────────────────────
|
|
100
|
+
export class YahooJapanSearchProvider {
|
|
101
|
+
id = 'yahoo_japan';
|
|
102
|
+
requiresApiKey = false;
|
|
103
|
+
async searchWeb(query, options) {
|
|
104
|
+
const { count = 10 } = options;
|
|
105
|
+
const params = new URLSearchParams({
|
|
106
|
+
p: query,
|
|
107
|
+
n: String(Math.min(count, 50)),
|
|
108
|
+
ei: 'UTF-8',
|
|
109
|
+
});
|
|
110
|
+
const url = `https://search.yahoo.co.jp/search?${params}`;
|
|
111
|
+
try {
|
|
112
|
+
const response = await simpleFetch(url, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36', 15000);
|
|
113
|
+
if (!response.html)
|
|
114
|
+
return [];
|
|
115
|
+
const $ = load(response.html);
|
|
116
|
+
const results = [];
|
|
117
|
+
// Yahoo Japan result selectors (2026 layout uses sw-Card components)
|
|
118
|
+
const seen = new Set();
|
|
119
|
+
$('.sw-Card__title, .algo, .dd').each((_, elem) => {
|
|
120
|
+
const el = $(elem);
|
|
121
|
+
// Walk up to the card container to find the link and snippet
|
|
122
|
+
const card = el.closest('[class*="sw-Card"], .algo, .dd, li').length
|
|
123
|
+
? el.closest('[class*="sw-Card"], .algo, .dd, li')
|
|
124
|
+
: el.parent();
|
|
125
|
+
const title = el.find('.sw-Card__titleMain, h3, a').first().text().trim() || el.text().trim();
|
|
126
|
+
const href = card.find('a[href^="http"]').first().attr('href') || '';
|
|
127
|
+
const snippet = card.find('.sw-Card__description, .sw-Card__floatText, p').first().text().trim();
|
|
128
|
+
// Filter Yahoo internal links
|
|
129
|
+
if (title && title.length > 3 && href && !href.includes('yahoo.co.jp/search') && !href.includes('cache.yahoofs') && !seen.has(href)) {
|
|
130
|
+
seen.add(href);
|
|
131
|
+
results.push({ title, url: href, snippet });
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
return results.slice(0, count);
|
|
135
|
+
}
|
|
136
|
+
catch {
|
|
137
|
+
return [];
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// ── Yandex Search ──────────────────────────────────────────────────────────
|
|
142
|
+
export class YandexSearchProvider {
|
|
143
|
+
id = 'yandex';
|
|
144
|
+
requiresApiKey = false;
|
|
145
|
+
async searchWeb(query, options) {
|
|
146
|
+
const { count = 10 } = options;
|
|
147
|
+
const params = new URLSearchParams({
|
|
148
|
+
text: query,
|
|
149
|
+
numdoc: String(Math.min(count, 50)),
|
|
150
|
+
lr: '84', // Default to US region; can be overridden
|
|
151
|
+
});
|
|
152
|
+
// Use Yandex HTML search
|
|
153
|
+
const url = `https://yandex.com/search/?${params}`;
|
|
154
|
+
try {
|
|
155
|
+
const response = await simpleFetch(url, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 15000);
|
|
156
|
+
if (!response.html)
|
|
157
|
+
return [];
|
|
158
|
+
const $ = load(response.html);
|
|
159
|
+
const results = [];
|
|
160
|
+
const seen = new Set();
|
|
161
|
+
// Yandex result selectors
|
|
162
|
+
$('.serp-item, .organic').each((_, elem) => {
|
|
163
|
+
const el = $(elem);
|
|
164
|
+
const linkEl = el.find('.organic__url, .link, a[href]').first();
|
|
165
|
+
const title = el.find('.organic__title, .OrganicTitle-LinkText, h2').first().text().trim();
|
|
166
|
+
const href = linkEl.attr('href') || '';
|
|
167
|
+
const snippet = el.find('.organic__text, .OrganicText, .text-container').first().text().trim();
|
|
168
|
+
// Filter internal Yandex links
|
|
169
|
+
if (title && href && !href.includes('yandex.') && !seen.has(href)) {
|
|
170
|
+
seen.add(href);
|
|
171
|
+
// Normalize URL (Yandex sometimes uses relative paths)
|
|
172
|
+
const fullUrl = href.startsWith('http') ? href : `https://${href}`;
|
|
173
|
+
results.push({ title, url: fullUrl, snippet });
|
|
174
|
+
}
|
|
175
|
+
});
|
|
176
|
+
return results.slice(0, count);
|
|
177
|
+
}
|
|
178
|
+
catch {
|
|
179
|
+
return [];
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* In production with no API keys configured, getBestSearchProvider() returns
|
|
13
13
|
* StealthSearchProvider since DDG HTTP is often blocked on datacenter IPs.
|
|
14
14
|
*/
|
|
15
|
-
export type SearchProviderId = 'duckduckgo' | 'brave' | 'stealth' | 'google';
|
|
15
|
+
export type SearchProviderId = 'duckduckgo' | 'brave' | 'stealth' | 'google' | 'baidu' | 'yandex' | 'naver' | 'yahoo_japan';
|
|
16
16
|
export interface WebSearchResult {
|
|
17
17
|
title: string;
|
|
18
18
|
url: string;
|
|
@@ -21,6 +21,8 @@ export interface WebSearchResult {
|
|
|
21
21
|
relevanceScore?: number;
|
|
22
22
|
/** Thumbnail/image URL from SearXNG results (img_src or thumbnail field). */
|
|
23
23
|
imageUrl?: string;
|
|
24
|
+
/** Structured SERP data when structured=true is passed in WebSearchOptions. */
|
|
25
|
+
serp?: import('./google-serp-parser.js').GoogleSerpResult;
|
|
24
26
|
}
|
|
25
27
|
export interface WebSearchOptions {
|
|
26
28
|
/** Number of results (1-10) */
|
|
@@ -42,6 +44,8 @@ export interface WebSearchOptions {
|
|
|
42
44
|
locale?: string;
|
|
43
45
|
/** Optional AbortSignal */
|
|
44
46
|
signal?: AbortSignal;
|
|
47
|
+
/** Return structured SERP data (knowledge panel, PAA, featured snippets, etc.) */
|
|
48
|
+
structured?: boolean;
|
|
45
49
|
}
|
|
46
50
|
export interface SearchProvider {
|
|
47
51
|
readonly id: SearchProviderId;
|
|
@@ -1400,8 +1400,18 @@ export class GoogleSearchProvider {
|
|
|
1400
1400
|
const snippet = cleanText(snippetText, { maxLen: 500, stripEllipsisPadding: true });
|
|
1401
1401
|
results.push({ title, url: validated, snippet });
|
|
1402
1402
|
});
|
|
1403
|
-
if (results.length > 0)
|
|
1404
|
-
|
|
1403
|
+
if (results.length > 0) {
|
|
1404
|
+
const sliced = results.slice(0, count);
|
|
1405
|
+
// Attach structured SERP data to the first result when structured=true
|
|
1406
|
+
if (options?.structured) {
|
|
1407
|
+
const { parseGoogleSerp } = await import('./google-serp-parser.js');
|
|
1408
|
+
const serp = parseGoogleSerp(html);
|
|
1409
|
+
if (sliced.length > 0) {
|
|
1410
|
+
sliced[0] = { ...sliced[0], serp };
|
|
1411
|
+
}
|
|
1412
|
+
}
|
|
1413
|
+
return sliced;
|
|
1414
|
+
}
|
|
1405
1415
|
}
|
|
1406
1416
|
}
|
|
1407
1417
|
catch (e) {
|
|
@@ -1536,6 +1546,9 @@ export function getSearchProvider(id) {
|
|
|
1536
1546
|
return new StealthSearchProvider();
|
|
1537
1547
|
if (id === 'google')
|
|
1538
1548
|
return new GoogleSearchProvider();
|
|
1549
|
+
// 'baidu' and 'yandex' are handled by BaiduSearchProvider / YandexSearchProvider
|
|
1550
|
+
// from './search-engines.js'. They cannot be imported here (circular dependency).
|
|
1551
|
+
// Use search-engines.ts directly for these providers.
|
|
1539
1552
|
// Exhaustive fallback (should be unreachable due to typing)
|
|
1540
1553
|
return new DuckDuckGoProvider();
|
|
1541
1554
|
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vertical search — specialized endpoints for shopping, news, images, videos.
|
|
3
|
+
* Uses Google vertical search pages + cheerio parsing.
|
|
4
|
+
*/
|
|
5
|
+
export interface ShoppingResult {
|
|
6
|
+
title: string;
|
|
7
|
+
price?: string;
|
|
8
|
+
currency?: string;
|
|
9
|
+
store: string;
|
|
10
|
+
url: string;
|
|
11
|
+
imageUrl?: string;
|
|
12
|
+
rating?: number;
|
|
13
|
+
reviewCount?: number;
|
|
14
|
+
condition?: string;
|
|
15
|
+
}
|
|
16
|
+
export interface NewsResult {
|
|
17
|
+
title: string;
|
|
18
|
+
url: string;
|
|
19
|
+
source: string;
|
|
20
|
+
date?: string;
|
|
21
|
+
snippet?: string;
|
|
22
|
+
imageUrl?: string;
|
|
23
|
+
category?: string;
|
|
24
|
+
}
|
|
25
|
+
export interface ImageResult {
|
|
26
|
+
title: string;
|
|
27
|
+
url: string;
|
|
28
|
+
imageUrl: string;
|
|
29
|
+
width?: number;
|
|
30
|
+
height?: number;
|
|
31
|
+
source?: string;
|
|
32
|
+
}
|
|
33
|
+
export interface VideoResult {
|
|
34
|
+
title: string;
|
|
35
|
+
url: string;
|
|
36
|
+
platform: string;
|
|
37
|
+
duration?: string;
|
|
38
|
+
date?: string;
|
|
39
|
+
thumbnailUrl?: string;
|
|
40
|
+
channel?: string;
|
|
41
|
+
views?: string;
|
|
42
|
+
}
|
|
43
|
+
export interface VerticalSearchOptions {
|
|
44
|
+
query: string;
|
|
45
|
+
count?: number;
|
|
46
|
+
country?: string;
|
|
47
|
+
language?: string;
|
|
48
|
+
freshness?: string;
|
|
49
|
+
}
|
|
50
|
+
export declare function searchShopping(opts: VerticalSearchOptions): Promise<ShoppingResult[]>;
|
|
51
|
+
export declare function searchNews(opts: VerticalSearchOptions): Promise<NewsResult[]>;
|
|
52
|
+
export declare function searchImages(opts: VerticalSearchOptions): Promise<ImageResult[]>;
|
|
53
|
+
export declare function searchVideos(opts: VerticalSearchOptions): Promise<VideoResult[]>;
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vertical search — specialized endpoints for shopping, news, images, videos.
|
|
3
|
+
* Uses Google vertical search pages + cheerio parsing.
|
|
4
|
+
*/
|
|
5
|
+
import { load } from 'cheerio';
|
|
6
|
+
// ── Shopping Search ────────────────────────────────────────────────────────
|
|
7
|
+
export async function searchShopping(opts) {
|
|
8
|
+
const { query, count = 10, country, language } = opts;
|
|
9
|
+
// Strategy: Use Google Shopping via peel() with render
|
|
10
|
+
const { peel } = await import('../index.js');
|
|
11
|
+
const params = new URLSearchParams({
|
|
12
|
+
q: query,
|
|
13
|
+
tbm: 'shop', // Google Shopping mode
|
|
14
|
+
num: String(Math.min(count * 2, 40)),
|
|
15
|
+
});
|
|
16
|
+
if (country)
|
|
17
|
+
params.set('gl', country.toLowerCase());
|
|
18
|
+
if (language)
|
|
19
|
+
params.set('hl', language);
|
|
20
|
+
const url = `https://www.google.com/search?${params}`;
|
|
21
|
+
try {
|
|
22
|
+
const result = await peel(url, {
|
|
23
|
+
render: true,
|
|
24
|
+
stealth: true,
|
|
25
|
+
format: 'html',
|
|
26
|
+
wait: 3000,
|
|
27
|
+
timeout: 15000,
|
|
28
|
+
});
|
|
29
|
+
const html = result.content || '';
|
|
30
|
+
if (!html || html.length < 500)
|
|
31
|
+
return [];
|
|
32
|
+
const $ = load(html);
|
|
33
|
+
const items = [];
|
|
34
|
+
// Google Shopping result selectors
|
|
35
|
+
$('.sh-dgr__content, .sh-dlr__list-result, .mnr-c .pla-unit, [data-docid], .KZmu8e').each((_, elem) => {
|
|
36
|
+
const el = $(elem);
|
|
37
|
+
const title = el.find('.tAxDx, .pymv4e, h3, .Xjkr3b').first().text().trim();
|
|
38
|
+
const price = el.find('.a8Pemb, .e10twf, .HRLxBb, .kHxwFf').first().text().trim();
|
|
39
|
+
const store = el.find('.aULzUe, .LbUacb, .dD8iuc, .IuHnof').first().text().trim();
|
|
40
|
+
const link = el.find('a[href]').first().attr('href') || '';
|
|
41
|
+
const img = el.find('img').first().attr('src') || '';
|
|
42
|
+
const ratingText = el.find('.Rsc7Yb, .yi40Hd').first().text().trim();
|
|
43
|
+
const reviewText = el.find('.QhqGkb, .RDApEe').first().text().trim();
|
|
44
|
+
if (title && (price || store)) {
|
|
45
|
+
items.push({
|
|
46
|
+
title,
|
|
47
|
+
price: price || undefined,
|
|
48
|
+
store: store || 'Unknown',
|
|
49
|
+
url: link.startsWith('http')
|
|
50
|
+
? link
|
|
51
|
+
: link.startsWith('/')
|
|
52
|
+
? `https://www.google.com${link}`
|
|
53
|
+
: link,
|
|
54
|
+
imageUrl: img.startsWith('http') ? img : undefined,
|
|
55
|
+
rating: parseFloat(ratingText) || undefined,
|
|
56
|
+
reviewCount: parseInt(reviewText.replace(/[^0-9]/g, '')) || undefined,
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
return items.slice(0, count);
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
return [];
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
// ── News Search ────────────────────────────────────────────────────────────
|
|
67
|
+
export async function searchNews(opts) {
|
|
68
|
+
const { query, count = 10, language, freshness } = opts;
|
|
69
|
+
const { peel } = await import('../index.js');
|
|
70
|
+
const params = new URLSearchParams({
|
|
71
|
+
q: query,
|
|
72
|
+
tbm: 'nws', // Google News mode
|
|
73
|
+
num: String(Math.min(count * 2, 40)),
|
|
74
|
+
});
|
|
75
|
+
if (language)
|
|
76
|
+
params.set('hl', language);
|
|
77
|
+
if (freshness === 'day')
|
|
78
|
+
params.set('tbs', 'qdr:d');
|
|
79
|
+
else if (freshness === 'week')
|
|
80
|
+
params.set('tbs', 'qdr:w');
|
|
81
|
+
else if (freshness === 'month')
|
|
82
|
+
params.set('tbs', 'qdr:m');
|
|
83
|
+
const url = `https://www.google.com/search?${params}`;
|
|
84
|
+
try {
|
|
85
|
+
const result = await peel(url, {
|
|
86
|
+
render: true,
|
|
87
|
+
stealth: true,
|
|
88
|
+
format: 'html',
|
|
89
|
+
wait: 3000,
|
|
90
|
+
timeout: 15000,
|
|
91
|
+
});
|
|
92
|
+
const html = result.content || '';
|
|
93
|
+
if (!html || html.length < 500)
|
|
94
|
+
return [];
|
|
95
|
+
const $ = load(html);
|
|
96
|
+
const items = [];
|
|
97
|
+
// Google News result selectors
|
|
98
|
+
$('.WlydOe, .JJZKK, .SoaBEf, .dbsr, [jscontroller="d0DtYd"]').each((_, elem) => {
|
|
99
|
+
const el = $(elem);
|
|
100
|
+
const title = el.find('[role="heading"], .mCBkyc, .nDgy9d, .JheGif').first().text().trim();
|
|
101
|
+
const link = el.find('a[href^="http"]').first().attr('href') || '';
|
|
102
|
+
const source = el.find('.NUnG9d, .CEMjEf, .XTjFC, .wEwyrc').first().text().trim();
|
|
103
|
+
const date = el.find('.OSrXXb, .WG9SHc, .f').first().text().trim();
|
|
104
|
+
const snippet = el.find('.GI74Re, .Y3v8qd, .VwiC3b').first().text().trim();
|
|
105
|
+
const img = el.find('img[src^="http"]').first().attr('src') || '';
|
|
106
|
+
if (title && link) {
|
|
107
|
+
items.push({
|
|
108
|
+
title,
|
|
109
|
+
url: link,
|
|
110
|
+
source: source || 'Unknown',
|
|
111
|
+
date: date || undefined,
|
|
112
|
+
snippet: snippet || undefined,
|
|
113
|
+
imageUrl: img || undefined,
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
return items.slice(0, count);
|
|
118
|
+
}
|
|
119
|
+
catch {
|
|
120
|
+
return [];
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
// ── Image Search ────────────────────────────────────────────────────────────
|
|
124
|
+
export async function searchImages(opts) {
|
|
125
|
+
const { query, count = 20 } = opts;
|
|
126
|
+
// Use Bing Images (more scrape-friendly than Google Images)
|
|
127
|
+
const { peel } = await import('../index.js');
|
|
128
|
+
const params = new URLSearchParams({ q: query, form: 'HDRSC2', first: '1' });
|
|
129
|
+
const url = `https://www.bing.com/images/search?${params}`;
|
|
130
|
+
try {
|
|
131
|
+
const result = await peel(url, { render: true, wait: 2000, timeout: 15000 });
|
|
132
|
+
const html = result.content || '';
|
|
133
|
+
if (!html || html.length < 500)
|
|
134
|
+
return [];
|
|
135
|
+
const $ = load(html);
|
|
136
|
+
const items = [];
|
|
137
|
+
// Bing Images selectors
|
|
138
|
+
$('.iusc, .imgpt, [data-idx]').each((_, elem) => {
|
|
139
|
+
const el = $(elem);
|
|
140
|
+
// Bing stores image data in a JSON attribute 'm'
|
|
141
|
+
const mData = el.attr('m');
|
|
142
|
+
if (mData) {
|
|
143
|
+
try {
|
|
144
|
+
const m = JSON.parse(mData);
|
|
145
|
+
items.push({
|
|
146
|
+
title: m.t || el.find('img').attr('alt') || '',
|
|
147
|
+
url: m.purl || '',
|
|
148
|
+
imageUrl: m.murl || m.turl || '',
|
|
149
|
+
width: m.w || undefined,
|
|
150
|
+
height: m.h || undefined,
|
|
151
|
+
source: m.desc || undefined,
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
/* skip malformed JSON */
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
// Fallback: direct img extraction
|
|
160
|
+
const img = el.find('img');
|
|
161
|
+
const imgSrc = img.attr('src') || img.attr('data-src') || '';
|
|
162
|
+
const title = img.attr('alt') || '';
|
|
163
|
+
if (imgSrc && imgSrc.startsWith('http')) {
|
|
164
|
+
items.push({
|
|
165
|
+
title,
|
|
166
|
+
url: el.find('a[href]').first().attr('href') || '',
|
|
167
|
+
imageUrl: imgSrc,
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
});
|
|
172
|
+
return items.slice(0, count);
|
|
173
|
+
}
|
|
174
|
+
catch {
|
|
175
|
+
return [];
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
// ── Video Search ────────────────────────────────────────────────────────────
|
|
179
|
+
export async function searchVideos(opts) {
|
|
180
|
+
const { query, count = 10 } = opts;
|
|
181
|
+
const { peel } = await import('../index.js');
|
|
182
|
+
const params = new URLSearchParams({
|
|
183
|
+
q: query,
|
|
184
|
+
tbm: 'vid', // Google Videos mode
|
|
185
|
+
num: String(Math.min(count * 2, 20)),
|
|
186
|
+
});
|
|
187
|
+
const url = `https://www.google.com/search?${params}`;
|
|
188
|
+
try {
|
|
189
|
+
const result = await peel(url, {
|
|
190
|
+
render: true,
|
|
191
|
+
stealth: true,
|
|
192
|
+
format: 'html',
|
|
193
|
+
wait: 3000,
|
|
194
|
+
timeout: 15000,
|
|
195
|
+
});
|
|
196
|
+
const html = result.content || '';
|
|
197
|
+
if (!html || html.length < 500)
|
|
198
|
+
return [];
|
|
199
|
+
const $ = load(html);
|
|
200
|
+
const items = [];
|
|
201
|
+
// Google Video result selectors
|
|
202
|
+
$('[data-surl], .dXiKIc, .g, .RzdJxc').each((_, elem) => {
|
|
203
|
+
const el = $(elem);
|
|
204
|
+
const title = el.find('h3, .fc9yUc, [aria-label]').first().text().trim();
|
|
205
|
+
const link = el.find('a[href^="http"]').first().attr('href') || '';
|
|
206
|
+
const duration = el.find('.J1mWY, .FGpTBd, .vdur').first().text().trim();
|
|
207
|
+
const date = el.find('.OSrXXb, .f').first().text().trim();
|
|
208
|
+
const channel = el.find('.pcJO7e, .GlPvmc').first().text().trim();
|
|
209
|
+
const thumb = el.find('img[src^="http"]').first().attr('src') || '';
|
|
210
|
+
if (title && link && !link.includes('google.com/search')) {
|
|
211
|
+
items.push({
|
|
212
|
+
title,
|
|
213
|
+
url: link,
|
|
214
|
+
platform: link.includes('youtube')
|
|
215
|
+
? 'YouTube'
|
|
216
|
+
: link.includes('vimeo')
|
|
217
|
+
? 'Vimeo'
|
|
218
|
+
: 'Web',
|
|
219
|
+
duration: duration || undefined,
|
|
220
|
+
date: date || undefined,
|
|
221
|
+
channel: channel || undefined,
|
|
222
|
+
thumbnailUrl: thumb || undefined,
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
});
|
|
226
|
+
return items.slice(0, count);
|
|
227
|
+
}
|
|
228
|
+
catch {
|
|
229
|
+
return [];
|
|
230
|
+
}
|
|
231
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -18,7 +18,10 @@ export { extractInlineJson, type InlineExtractOptions, type InlineExtractResult
|
|
|
18
18
|
export { runAgent, type AgentOptions, type AgentResult, type AgentProgress, type AgentStreamEvent, type AgentDepth, type AgentTopic } from './core/agent.js';
|
|
19
19
|
export { summarizeContent, type SummarizeOptions } from './core/summarize.js';
|
|
20
20
|
export { getSearchProvider, DuckDuckGoProvider, BraveSearchProvider, providerStats, type SearchProvider, type SearchProviderId, type WebSearchResult, type WebSearchOptions, } from './core/search-provider.js';
|
|
21
|
+
export { BaiduSearchProvider, YandexSearchProvider, NaverSearchProvider, YahooJapanSearchProvider } from './core/search-engines.js';
|
|
22
|
+
export { crossVerifySearch, type CrossVerifyResult } from './core/cross-verify.js';
|
|
21
23
|
export { answerQuestion, type AnswerRequest, type AnswerResponse, type AnswerCitation, type LLMProviderId, type TokensUsed, } from './core/answer.js';
|
|
24
|
+
export { parseGoogleSerp, type GoogleSerpResult } from './core/google-serp-parser.js';
|
|
22
25
|
export { searchJobs, type JobCard, type JobDetail, type JobSearchOptions, type JobSearchResult } from './core/jobs.js';
|
|
23
26
|
export { RateGovernor, formatDuration, type RateConfig, type RateState, type CanApplyResult, } from './core/rate-governor.js';
|
|
24
27
|
export { ApplicationTracker, type ApplicationRecord, type ApplicationFilter, type ApplicationStats, type ApplicationStatus, } from './core/application-tracker.js';
|
|
@@ -52,6 +55,8 @@ export { localSearch, type LocalSearchOptions, type LocalSearchResult, type Loca
|
|
|
52
55
|
export { getBusinessIntel, type BusinessIntel } from './core/business-intel.js';
|
|
53
56
|
export { CircuitBreaker, browserCircuitBreaker, type CircuitState } from './core/circuit-breaker.js';
|
|
54
57
|
export { checkMemoryPressure } from './core/browser-pool.js';
|
|
58
|
+
export { searchShopping, searchNews, searchImages, searchVideos } from './core/vertical-search.js';
|
|
59
|
+
export type { ShoppingResult, NewsResult, ImageResult, VideoResult, VerticalSearchOptions } from './core/vertical-search.js';
|
|
55
60
|
/**
|
|
56
61
|
* Fetch and extract content from a URL
|
|
57
62
|
*
|
package/dist/index.js
CHANGED
|
@@ -21,7 +21,10 @@ export { extractInlineJson } from './core/extract-inline.js';
|
|
|
21
21
|
export { runAgent } from './core/agent.js';
|
|
22
22
|
export { summarizeContent } from './core/summarize.js';
|
|
23
23
|
export { getSearchProvider, DuckDuckGoProvider, BraveSearchProvider, providerStats, } from './core/search-provider.js';
|
|
24
|
+
export { BaiduSearchProvider, YandexSearchProvider, NaverSearchProvider, YahooJapanSearchProvider } from './core/search-engines.js';
|
|
25
|
+
export { crossVerifySearch } from './core/cross-verify.js';
|
|
24
26
|
export { answerQuestion, } from './core/answer.js';
|
|
27
|
+
export { parseGoogleSerp } from './core/google-serp-parser.js';
|
|
25
28
|
export { searchJobs } from './core/jobs.js';
|
|
26
29
|
export { RateGovernor, formatDuration, } from './core/rate-governor.js';
|
|
27
30
|
export { ApplicationTracker, } from './core/application-tracker.js';
|
|
@@ -60,6 +63,7 @@ export { localSearch } from './core/local-search.js';
|
|
|
60
63
|
export { getBusinessIntel } from './core/business-intel.js';
|
|
61
64
|
export { CircuitBreaker, browserCircuitBreaker } from './core/circuit-breaker.js';
|
|
62
65
|
export { checkMemoryPressure } from './core/browser-pool.js';
|
|
66
|
+
export { searchShopping, searchNews, searchImages, searchVideos } from './core/vertical-search.js';
|
|
63
67
|
/**
|
|
64
68
|
* Fetch and extract content from a URL
|
|
65
69
|
*
|
package/dist/server/app.js
CHANGED
|
@@ -19,7 +19,7 @@ import { createRateLimitMiddleware, RateLimiter } from './middleware/rate-limit.
|
|
|
19
19
|
import { createHealthRouter } from './routes/health.js';
|
|
20
20
|
import { createFetchRouter } from './routes/fetch.js';
|
|
21
21
|
import { createSearchRouter } from './routes/search.js';
|
|
22
|
-
import { createSmartSearchRouter } from './routes/smart-search.js';
|
|
22
|
+
import { createSmartSearchRouter } from './routes/smart-search/index.js';
|
|
23
23
|
import { createUserRouter } from './routes/users.js';
|
|
24
24
|
import { createStripeRouter, createBillingPortalRouter } from './routes/stripe.js';
|
|
25
25
|
import { createOAuthRouter } from './routes/oauth.js';
|