webpeel 0.21.89 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +5 -1
- package/dist/core/search-provider.js +15 -2
- package/dist/core/vertical-search.d.ts +53 -0
- package/dist/core/vertical-search.js +231 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +4 -0
- package/dist/server/app.js +1 -1
- package/dist/server/routes/search.js +199 -3
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +99 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +69 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/general.js +390 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +85 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +213 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +151 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +205 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +508 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +109 -0
- package/dist/server/routes/smart-search/llm.d.ts +8 -0
- package/dist/server/routes/smart-search/llm.js +101 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +30 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +12 -0
- package/dist/server/routes/smart-search/utils.js +97 -0
- package/package.json +1 -1
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-source verification — search multiple engines, compare results,
|
|
3
|
+
* compute consensus/confidence scores.
|
|
4
|
+
*/
|
|
5
|
+
import type { WebSearchResult } from './search-provider.js';
|
|
6
|
+
export interface CrossVerifyResult {
|
|
7
|
+
query: string;
|
|
8
|
+
sources: Array<{
|
|
9
|
+
engine: string;
|
|
10
|
+
resultCount: number;
|
|
11
|
+
topResults: WebSearchResult[];
|
|
12
|
+
}>;
|
|
13
|
+
consensus: Array<{
|
|
14
|
+
url: string;
|
|
15
|
+
title: string;
|
|
16
|
+
appearsIn: string[];
|
|
17
|
+
agreementScore: number;
|
|
18
|
+
averagePosition: number;
|
|
19
|
+
}>;
|
|
20
|
+
confidence: number;
|
|
21
|
+
totalSources: number;
|
|
22
|
+
elapsed: number;
|
|
23
|
+
}
|
|
24
|
+
export declare function crossVerifySearch(query: string, options?: {
|
|
25
|
+
engines?: string[];
|
|
26
|
+
count?: number;
|
|
27
|
+
}): Promise<CrossVerifyResult>;
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-source verification — search multiple engines, compare results,
|
|
3
|
+
* compute consensus/confidence scores.
|
|
4
|
+
*/
|
|
5
|
+
export async function crossVerifySearch(query, options) {
|
|
6
|
+
const engines = options?.engines || ['duckduckgo', 'google', 'baidu'];
|
|
7
|
+
const count = options?.count || 10;
|
|
8
|
+
const t0 = Date.now();
|
|
9
|
+
// Import providers dynamically to avoid circular deps
|
|
10
|
+
const { getSearchProvider } = await import('./search-provider.js');
|
|
11
|
+
const { BaiduSearchProvider, YandexSearchProvider, NaverSearchProvider, YahooJapanSearchProvider } = await import('./search-engines.js');
|
|
12
|
+
// Search all engines in parallel
|
|
13
|
+
const searchPromises = engines.map(async (engineId) => {
|
|
14
|
+
try {
|
|
15
|
+
let provider;
|
|
16
|
+
if (engineId === 'baidu')
|
|
17
|
+
provider = new BaiduSearchProvider();
|
|
18
|
+
else if (engineId === 'yandex')
|
|
19
|
+
provider = new YandexSearchProvider();
|
|
20
|
+
else if (engineId === 'naver')
|
|
21
|
+
provider = new NaverSearchProvider();
|
|
22
|
+
else if (engineId === 'yahoo_japan')
|
|
23
|
+
provider = new YahooJapanSearchProvider();
|
|
24
|
+
else
|
|
25
|
+
provider = getSearchProvider(engineId);
|
|
26
|
+
const results = await Promise.race([
|
|
27
|
+
provider.searchWeb(query, { count }),
|
|
28
|
+
new Promise((_, rej) => setTimeout(() => rej(new Error('timeout')), 10000)),
|
|
29
|
+
]);
|
|
30
|
+
return { engine: engineId, resultCount: results.length, topResults: results.slice(0, count) };
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
return { engine: engineId, resultCount: 0, topResults: [] };
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
const sources = await Promise.all(searchPromises);
|
|
37
|
+
// Build consensus — find URLs that appear across multiple engines
|
|
38
|
+
const urlMap = new Map();
|
|
39
|
+
for (const source of sources) {
|
|
40
|
+
for (let i = 0; i < source.topResults.length; i++) {
|
|
41
|
+
const r = source.topResults[i];
|
|
42
|
+
// Normalize URL for comparison (strip www, trailing slash, query params)
|
|
43
|
+
const normalizedUrl = normalizeUrl(r.url);
|
|
44
|
+
const existing = urlMap.get(normalizedUrl);
|
|
45
|
+
if (existing) {
|
|
46
|
+
existing.engines.push(source.engine);
|
|
47
|
+
existing.positions.push(i + 1);
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
urlMap.set(normalizedUrl, {
|
|
51
|
+
title: r.title,
|
|
52
|
+
engines: [source.engine],
|
|
53
|
+
positions: [i + 1],
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
const activeSources = sources.filter(s => s.resultCount > 0);
|
|
59
|
+
// Sort by agreement (most engines first), then by average position
|
|
60
|
+
const consensus = [...urlMap.entries()]
|
|
61
|
+
.map(([url, data]) => ({
|
|
62
|
+
url,
|
|
63
|
+
title: data.title,
|
|
64
|
+
appearsIn: data.engines,
|
|
65
|
+
agreementScore: activeSources.length > 0
|
|
66
|
+
? data.engines.length / activeSources.length
|
|
67
|
+
: 0,
|
|
68
|
+
averagePosition: data.positions.reduce((a, b) => a + b, 0) / data.positions.length,
|
|
69
|
+
}))
|
|
70
|
+
.sort((a, b) => b.agreementScore - a.agreementScore || a.averagePosition - b.averagePosition);
|
|
71
|
+
// Overall confidence = average agreement of top 5 results
|
|
72
|
+
const top5Agreement = consensus.slice(0, 5);
|
|
73
|
+
const confidence = top5Agreement.length > 0
|
|
74
|
+
? top5Agreement.reduce((sum, r) => sum + r.agreementScore, 0) / top5Agreement.length
|
|
75
|
+
: 0;
|
|
76
|
+
return {
|
|
77
|
+
query,
|
|
78
|
+
sources,
|
|
79
|
+
consensus,
|
|
80
|
+
confidence: Math.round(confidence * 100) / 100,
|
|
81
|
+
totalSources: activeSources.length,
|
|
82
|
+
elapsed: Date.now() - t0,
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
function normalizeUrl(url) {
|
|
86
|
+
try {
|
|
87
|
+
const u = new URL(url);
|
|
88
|
+
return u.hostname.replace(/^www\./, '') + u.pathname.replace(/\/$/, '');
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
return url;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Google SERP Parser — extracts rich structured data from Google search HTML.
|
|
3
|
+
* Supports organic results, knowledge panel, PAA, featured snippets,
|
|
4
|
+
* related searches, shopping, news, images, videos, and local pack.
|
|
5
|
+
*/
|
|
6
|
+
export interface GoogleSerpResult {
|
|
7
|
+
organicResults: Array<{
|
|
8
|
+
position: number;
|
|
9
|
+
title: string;
|
|
10
|
+
url: string;
|
|
11
|
+
snippet: string;
|
|
12
|
+
sitelinks?: Array<{
|
|
13
|
+
title: string;
|
|
14
|
+
url: string;
|
|
15
|
+
}>;
|
|
16
|
+
date?: string;
|
|
17
|
+
cachedUrl?: string;
|
|
18
|
+
}>;
|
|
19
|
+
knowledgePanel?: {
|
|
20
|
+
title: string;
|
|
21
|
+
type?: string;
|
|
22
|
+
description?: string;
|
|
23
|
+
source?: string;
|
|
24
|
+
sourceUrl?: string;
|
|
25
|
+
attributes?: Record<string, string>;
|
|
26
|
+
imageUrl?: string;
|
|
27
|
+
};
|
|
28
|
+
peopleAlsoAsk?: Array<{
|
|
29
|
+
question: string;
|
|
30
|
+
snippet?: string;
|
|
31
|
+
source?: string;
|
|
32
|
+
sourceUrl?: string;
|
|
33
|
+
}>;
|
|
34
|
+
featuredSnippet?: {
|
|
35
|
+
text: string;
|
|
36
|
+
source: string;
|
|
37
|
+
sourceUrl: string;
|
|
38
|
+
type: 'paragraph' | 'list' | 'table';
|
|
39
|
+
};
|
|
40
|
+
relatedSearches?: string[];
|
|
41
|
+
shoppingResults?: Array<{
|
|
42
|
+
title: string;
|
|
43
|
+
price?: string;
|
|
44
|
+
source?: string;
|
|
45
|
+
url?: string;
|
|
46
|
+
imageUrl?: string;
|
|
47
|
+
rating?: number;
|
|
48
|
+
reviewCount?: number;
|
|
49
|
+
}>;
|
|
50
|
+
newsResults?: Array<{
|
|
51
|
+
title: string;
|
|
52
|
+
url: string;
|
|
53
|
+
source: string;
|
|
54
|
+
date?: string;
|
|
55
|
+
snippet?: string;
|
|
56
|
+
imageUrl?: string;
|
|
57
|
+
}>;
|
|
58
|
+
imagePack?: Array<{
|
|
59
|
+
url: string;
|
|
60
|
+
imageUrl: string;
|
|
61
|
+
title?: string;
|
|
62
|
+
}>;
|
|
63
|
+
videoResults?: Array<{
|
|
64
|
+
title: string;
|
|
65
|
+
url: string;
|
|
66
|
+
platform?: string;
|
|
67
|
+
duration?: string;
|
|
68
|
+
date?: string;
|
|
69
|
+
thumbnailUrl?: string;
|
|
70
|
+
}>;
|
|
71
|
+
localPack?: Array<{
|
|
72
|
+
name: string;
|
|
73
|
+
address?: string;
|
|
74
|
+
rating?: number;
|
|
75
|
+
reviewCount?: number;
|
|
76
|
+
type?: string;
|
|
77
|
+
phone?: string;
|
|
78
|
+
}>;
|
|
79
|
+
totalResults?: string;
|
|
80
|
+
searchTime?: string;
|
|
81
|
+
}
|
|
82
|
+
export declare function parseGoogleSerp(html: string): GoogleSerpResult;
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Google SERP Parser — extracts rich structured data from Google search HTML.
|
|
3
|
+
* Supports organic results, knowledge panel, PAA, featured snippets,
|
|
4
|
+
* related searches, shopping, news, images, videos, and local pack.
|
|
5
|
+
*/
|
|
6
|
+
import { load } from 'cheerio';
|
|
7
|
+
export function parseGoogleSerp(html) {
|
|
8
|
+
const $ = load(html);
|
|
9
|
+
const result = { organicResults: [] };
|
|
10
|
+
// ── 1. Organic Results ──────────────────────────────────────────────────────
|
|
11
|
+
// Multiple selector patterns for resilience across Google HTML variants
|
|
12
|
+
let position = 1;
|
|
13
|
+
const seenUrls = new Set();
|
|
14
|
+
$('#search .g, #rso .g').each((_, elem) => {
|
|
15
|
+
const el = $(elem);
|
|
16
|
+
// Skip ad blocks, PAA, related searches containers
|
|
17
|
+
if (el.closest('[data-text-ad]').length)
|
|
18
|
+
return;
|
|
19
|
+
if (el.closest('.related-question-pair').length)
|
|
20
|
+
return;
|
|
21
|
+
if (el.closest('[data-initq]').length)
|
|
22
|
+
return; // related searches
|
|
23
|
+
if (el.find('[data-text-ad]').length)
|
|
24
|
+
return;
|
|
25
|
+
const linkEl = el.find('a[href^="http"]').first();
|
|
26
|
+
const title = el.find('h3').first().text().trim();
|
|
27
|
+
const url = linkEl.attr('href') || '';
|
|
28
|
+
if (!title || !url)
|
|
29
|
+
return;
|
|
30
|
+
if (url.includes('google.com/search'))
|
|
31
|
+
return;
|
|
32
|
+
if (url.includes('/aclk'))
|
|
33
|
+
return; // Google ad click tracking
|
|
34
|
+
if (seenUrls.has(url))
|
|
35
|
+
return;
|
|
36
|
+
seenUrls.add(url);
|
|
37
|
+
const snippet = el.find('.VwiC3b').first().text().trim() ||
|
|
38
|
+
el.find('span.aCOpRe').first().text().trim() ||
|
|
39
|
+
el.find('[data-sncf]').first().text().trim() ||
|
|
40
|
+
el.find('[style*="-webkit-line-clamp"]').first().text().trim() ||
|
|
41
|
+
'';
|
|
42
|
+
// Sitelinks (sub-links shown under some results)
|
|
43
|
+
const sitelinks = [];
|
|
44
|
+
el.find('.fl a, .sld a, [data-sitelink] a').each((_, sEl) => {
|
|
45
|
+
const sTitle = $(sEl).text().trim();
|
|
46
|
+
const sUrl = $(sEl).attr('href') || '';
|
|
47
|
+
if (sTitle && sUrl && sUrl.startsWith('http')) {
|
|
48
|
+
sitelinks.push({ title: sTitle, url: sUrl });
|
|
49
|
+
}
|
|
50
|
+
});
|
|
51
|
+
const dateText = el.find('.LEwnzc span, .f').first().text().trim();
|
|
52
|
+
result.organicResults.push({
|
|
53
|
+
position: position++,
|
|
54
|
+
title,
|
|
55
|
+
url,
|
|
56
|
+
snippet,
|
|
57
|
+
...(sitelinks.length > 0 ? { sitelinks } : {}),
|
|
58
|
+
...(dateText ? { date: dateText } : {}),
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
// ── 2. Knowledge Panel ──────────────────────────────────────────────────────
|
|
62
|
+
const kp = $('.kp-wholepage, .knowledge-panel, .osrp-blk').first();
|
|
63
|
+
if (kp.length) {
|
|
64
|
+
const kpTitle = kp.find('[data-attrid="title"], h2').first().text().trim();
|
|
65
|
+
const kpType = kp.find('[data-attrid="subtitle"], .wwUB2c').first().text().trim();
|
|
66
|
+
const kpDesc = kp.find('[data-attrid="description"] span, .kno-rdesc span').first().text().trim();
|
|
67
|
+
const kpSource = kp.find('.kno-rdesc a, [data-attrid="description"] a').first();
|
|
68
|
+
const kpImage = kp.find('g-img img, .kno-ftr img').first().attr('src');
|
|
69
|
+
if (kpTitle) {
|
|
70
|
+
const attrs = {};
|
|
71
|
+
kp.find('[data-attrid]').each((_, attrEl) => {
|
|
72
|
+
const key = $(attrEl).find('.w8qArf, .Z1hOCe').text().trim().replace(/:$/, '');
|
|
73
|
+
const val = $(attrEl).find('.LrzXr, .kno-fv').text().trim();
|
|
74
|
+
if (key && val && key !== kpTitle)
|
|
75
|
+
attrs[key] = val;
|
|
76
|
+
});
|
|
77
|
+
result.knowledgePanel = {
|
|
78
|
+
title: kpTitle,
|
|
79
|
+
...(kpType ? { type: kpType } : {}),
|
|
80
|
+
...(kpDesc ? { description: kpDesc } : {}),
|
|
81
|
+
...(kpSource.text().trim() ? { source: kpSource.text().trim() } : {}),
|
|
82
|
+
...(kpSource.attr('href') ? { sourceUrl: kpSource.attr('href') } : {}),
|
|
83
|
+
...(Object.keys(attrs).length > 0 ? { attributes: attrs } : {}),
|
|
84
|
+
...(kpImage ? { imageUrl: kpImage } : {}),
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// ── 3. People Also Ask ──────────────────────────────────────────────────────
|
|
89
|
+
const paaItems = [];
|
|
90
|
+
const seenQuestions = new Set();
|
|
91
|
+
$('.related-question-pair, [data-sgrd="true"]').each((_, elem) => {
|
|
92
|
+
const el = $(elem);
|
|
93
|
+
const question = (el.find('[data-q]').attr('data-q') !== 'true' ? el.find('[data-q]').attr('data-q')?.trim() : '') ||
|
|
94
|
+
el.find('[data-q]').text().trim() ||
|
|
95
|
+
el.find('.CSkcDe').first().text().trim() ||
|
|
96
|
+
el.find('[jsname="Cpkphb"] span').first().text().trim() ||
|
|
97
|
+
'';
|
|
98
|
+
if (!question || question.length < 5)
|
|
99
|
+
return;
|
|
100
|
+
if (seenQuestions.has(question))
|
|
101
|
+
return;
|
|
102
|
+
seenQuestions.add(question);
|
|
103
|
+
const snippet = el.find('.wDYxhc, .LGOjhe').first().text().trim() || undefined;
|
|
104
|
+
const sourceEl = el.find('a[href^="http"]').first();
|
|
105
|
+
paaItems.push({
|
|
106
|
+
question,
|
|
107
|
+
...(snippet ? { snippet: snippet.slice(0, 500) } : {}),
|
|
108
|
+
...(sourceEl.text().trim() ? { source: sourceEl.text().trim() } : {}),
|
|
109
|
+
...(sourceEl.attr('href') ? { sourceUrl: sourceEl.attr('href') } : {}),
|
|
110
|
+
});
|
|
111
|
+
});
|
|
112
|
+
if (paaItems.length > 0)
|
|
113
|
+
result.peopleAlsoAsk = paaItems;
|
|
114
|
+
// ── 4. Featured Snippet ─────────────────────────────────────────────────────
|
|
115
|
+
// Try multiple selectors — Google changes these frequently
|
|
116
|
+
const fSnippet = $('.xpdopen .hgKElc, .c2xzTb, .IZ6rdc, [data-attrid="wa:/description"] .LGOjhe').first();
|
|
117
|
+
if (fSnippet.length) {
|
|
118
|
+
const fText = fSnippet.text().trim();
|
|
119
|
+
const fContainer = fSnippet.closest('.g, .xpdopen, [data-hveid]');
|
|
120
|
+
const fSourceEl = fContainer.find('a[href^="http"]').first();
|
|
121
|
+
if (fText && fText.length > 20) {
|
|
122
|
+
const hasList = fSnippet.find('ol, ul').length > 0;
|
|
123
|
+
const hasTable = fSnippet.find('table').length > 0;
|
|
124
|
+
result.featuredSnippet = {
|
|
125
|
+
text: fText.slice(0, 1000),
|
|
126
|
+
source: fSourceEl.find('h3, cite').first().text().trim() || fContainer.find('cite').first().text().trim() || '',
|
|
127
|
+
sourceUrl: fSourceEl.attr('href') || '',
|
|
128
|
+
type: hasList ? 'list' : hasTable ? 'table' : 'paragraph',
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
// ── 5. Related Searches ─────────────────────────────────────────────────────
|
|
133
|
+
const related = [];
|
|
134
|
+
const seenRelated = new Set();
|
|
135
|
+
$('.k8XOCe, .s75CSd, .EIaa9b, .brs_col a, [data-initq]').each((_, elem) => {
|
|
136
|
+
const text = $(elem).text().trim();
|
|
137
|
+
if (text && text.length > 2 && text.length < 100 && !seenRelated.has(text)) {
|
|
138
|
+
seenRelated.add(text);
|
|
139
|
+
related.push(text);
|
|
140
|
+
}
|
|
141
|
+
});
|
|
142
|
+
if (related.length > 0)
|
|
143
|
+
result.relatedSearches = related;
|
|
144
|
+
// ── 6. Shopping Results ─────────────────────────────────────────────────────
|
|
145
|
+
const shopping = [];
|
|
146
|
+
const seenShopTitles = new Set();
|
|
147
|
+
$('.sh-dgr__content, .mnr-c .pla-unit, [data-docid]').each((_, elem) => {
|
|
148
|
+
const el = $(elem);
|
|
149
|
+
const title = el.find('.tAxDx, .pymv4e, h3').first().text().trim();
|
|
150
|
+
if (!title || seenShopTitles.has(title))
|
|
151
|
+
return;
|
|
152
|
+
seenShopTitles.add(title);
|
|
153
|
+
const price = el.find('.a8Pemb, .e10twf, .HRLxBb').first().text().trim();
|
|
154
|
+
const store = el.find('.aULzUe, .LbUacb, .dD8iuc').first().text().trim();
|
|
155
|
+
const url = el.find('a[href]').first().attr('href') || undefined;
|
|
156
|
+
const imageUrl = el.find('img').first().attr('src') || undefined;
|
|
157
|
+
const ratingText = el.find('.Rsc7Yb, .NHJBb').first().text().trim();
|
|
158
|
+
const reviewText = el.find('.GpVvtc, .MRqCbe').first().text().trim();
|
|
159
|
+
shopping.push({
|
|
160
|
+
title,
|
|
161
|
+
...(price ? { price } : {}),
|
|
162
|
+
...(store ? { source: store } : {}),
|
|
163
|
+
...(url ? { url } : {}),
|
|
164
|
+
...(imageUrl ? { imageUrl } : {}),
|
|
165
|
+
...(ratingText ? { rating: parseFloat(ratingText) || undefined } : {}),
|
|
166
|
+
...(reviewText ? { reviewCount: parseInt(reviewText.replace(/[^0-9]/g, ''), 10) || undefined } : {}),
|
|
167
|
+
});
|
|
168
|
+
});
|
|
169
|
+
if (shopping.length > 0)
|
|
170
|
+
result.shoppingResults = shopping;
|
|
171
|
+
// ── 7. News Results ─────────────────────────────────────────────────────────
|
|
172
|
+
const news = [];
|
|
173
|
+
const seenNewsUrls = new Set();
|
|
174
|
+
$('.WlydOe, .JJZKK, .SoaBEf, [jscontroller="d0DtYd"]').each((_, elem) => {
|
|
175
|
+
const el = $(elem);
|
|
176
|
+
const title = el.find('[role="heading"], .mCBkyc, .nDgy9d').first().text().trim();
|
|
177
|
+
const url = el.find('a[href^="http"]').first().attr('href') || '';
|
|
178
|
+
if (!title || !url || seenNewsUrls.has(url))
|
|
179
|
+
return;
|
|
180
|
+
seenNewsUrls.add(url);
|
|
181
|
+
const source = el.find('.NUnG9d, .CEMjEf, .XTjFC').first().text().trim();
|
|
182
|
+
const date = el.find('.OSrXXb, .f').first().text().trim() || undefined;
|
|
183
|
+
const snippet = el.find('.GI74Re, .lEBKkf').first().text().trim() || undefined;
|
|
184
|
+
const imageUrl = el.find('img').first().attr('src') || undefined;
|
|
185
|
+
news.push({
|
|
186
|
+
title,
|
|
187
|
+
url,
|
|
188
|
+
source: source || '',
|
|
189
|
+
...(date ? { date } : {}),
|
|
190
|
+
...(snippet ? { snippet } : {}),
|
|
191
|
+
...(imageUrl ? { imageUrl } : {}),
|
|
192
|
+
});
|
|
193
|
+
});
|
|
194
|
+
if (news.length > 0)
|
|
195
|
+
result.newsResults = news;
|
|
196
|
+
// ── 8. Image Pack ───────────────────────────────────────────────────────────
|
|
197
|
+
const images = [];
|
|
198
|
+
const seenImageUrls = new Set();
|
|
199
|
+
$('.isv-r a[href], .iKjWAf a[href]').each((_, elem) => {
|
|
200
|
+
const el = $(elem);
|
|
201
|
+
const url = el.attr('href') || '';
|
|
202
|
+
const imageUrl = el.find('img').first().attr('src') || el.find('img').first().attr('data-src') || '';
|
|
203
|
+
if (!url || !imageUrl || seenImageUrls.has(url))
|
|
204
|
+
return;
|
|
205
|
+
seenImageUrls.add(url);
|
|
206
|
+
const title = el.find('img').first().attr('alt') || el.attr('aria-label') || undefined;
|
|
207
|
+
images.push({
|
|
208
|
+
url,
|
|
209
|
+
imageUrl,
|
|
210
|
+
...(title ? { title } : {}),
|
|
211
|
+
});
|
|
212
|
+
});
|
|
213
|
+
if (images.length > 0)
|
|
214
|
+
result.imagePack = images;
|
|
215
|
+
// ── 9. Video Results ────────────────────────────────────────────────────────
|
|
216
|
+
const videos = [];
|
|
217
|
+
const seenVideoUrls = new Set();
|
|
218
|
+
$('[data-surl], .dXiKIc, .RzdJxc, .ct3b9e').each((_, elem) => {
|
|
219
|
+
const el = $(elem);
|
|
220
|
+
const title = el.find('h3').first().text().trim() ||
|
|
221
|
+
el.find('.fc9yUc').first().text().trim() ||
|
|
222
|
+
el.find('[aria-label]').first().attr('aria-label') || '';
|
|
223
|
+
const url = el.find('a[href^="http"]').first().attr('href') || el.attr('data-surl') || '';
|
|
224
|
+
if (!title || !url || seenVideoUrls.has(url))
|
|
225
|
+
return;
|
|
226
|
+
seenVideoUrls.add(url);
|
|
227
|
+
const duration = el.find('.J1mWY, .FGpTBd, .vjB1Cc').first().text().trim() || undefined;
|
|
228
|
+
const date = el.find('.LEwnzc, .f').first().text().trim() || undefined;
|
|
229
|
+
const thumbnailUrl = el.find('img').first().attr('src') || undefined;
|
|
230
|
+
let platform;
|
|
231
|
+
if (url.includes('youtube.com') || url.includes('youtu.be'))
|
|
232
|
+
platform = 'YouTube';
|
|
233
|
+
else if (url.includes('vimeo.com'))
|
|
234
|
+
platform = 'Vimeo';
|
|
235
|
+
else if (url.includes('dailymotion.com'))
|
|
236
|
+
platform = 'Dailymotion';
|
|
237
|
+
else if (url.includes('tiktok.com'))
|
|
238
|
+
platform = 'TikTok';
|
|
239
|
+
videos.push({
|
|
240
|
+
title,
|
|
241
|
+
url,
|
|
242
|
+
...(platform ? { platform } : {}),
|
|
243
|
+
...(duration ? { duration } : {}),
|
|
244
|
+
...(date ? { date } : {}),
|
|
245
|
+
...(thumbnailUrl ? { thumbnailUrl } : {}),
|
|
246
|
+
});
|
|
247
|
+
});
|
|
248
|
+
if (videos.length > 0)
|
|
249
|
+
result.videoResults = videos;
|
|
250
|
+
// ── 10. Local Pack ──────────────────────────────────────────────────────────
|
|
251
|
+
const localPack = [];
|
|
252
|
+
const seenLocalNames = new Set();
|
|
253
|
+
$('.VkpGBb, [data-local-attribute], .rllt__details').each((_, elem) => {
|
|
254
|
+
const el = $(elem);
|
|
255
|
+
const name = el.find('.OSrXXb, .dbg0pd').first().text().trim() ||
|
|
256
|
+
el.find('[role="heading"]').first().text().trim() ||
|
|
257
|
+
'';
|
|
258
|
+
if (!name || seenLocalNames.has(name))
|
|
259
|
+
return;
|
|
260
|
+
seenLocalNames.add(name);
|
|
261
|
+
const ratingText = el.find('.MW4etd, .yi40Hd').first().text().trim();
|
|
262
|
+
const reviewText = el.find('.UY7F9, .RDApEe').first().text().trim();
|
|
263
|
+
const rating = parseFloat(ratingText) || undefined;
|
|
264
|
+
const reviewCount = parseInt(reviewText.replace(/[^0-9]/g, ''), 10) || undefined;
|
|
265
|
+
localPack.push({
|
|
266
|
+
name,
|
|
267
|
+
address: el.find('.lMbq3e, .sXtWJb').first().text().trim() || undefined,
|
|
268
|
+
...(rating !== undefined ? { rating } : {}),
|
|
269
|
+
...(reviewCount !== undefined ? { reviewCount } : {}),
|
|
270
|
+
type: el.find('.YhemCb, .Q2vNVc').first().text().trim() || undefined,
|
|
271
|
+
phone: el.find('.fhNHSe, [data-dtype="d3ph"]').first().text().trim() || undefined,
|
|
272
|
+
});
|
|
273
|
+
});
|
|
274
|
+
if (localPack.length > 0)
|
|
275
|
+
result.localPack = localPack;
|
|
276
|
+
// ── 11. Total results / search time ─────────────────────────────────────────
|
|
277
|
+
const stats = $('#result-stats').text().trim();
|
|
278
|
+
if (stats) {
|
|
279
|
+
const totalMatch = stats.match(/About ([\d,]+) results/i);
|
|
280
|
+
const timeMatch = stats.match(/\(([\d.]+) seconds?\)/i);
|
|
281
|
+
if (totalMatch)
|
|
282
|
+
result.totalResults = totalMatch[1];
|
|
283
|
+
if (timeMatch)
|
|
284
|
+
result.searchTime = timeMatch[1];
|
|
285
|
+
}
|
|
286
|
+
return result;
|
|
287
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Additional search engine providers: Baidu, Yandex
|
|
3
|
+
* HTTP-only scraping with cheerio — no browser, no API key required.
|
|
4
|
+
*/
|
|
5
|
+
import type { WebSearchResult, WebSearchOptions, SearchProvider, SearchProviderId } from './search-provider.js';
|
|
6
|
+
export declare class BaiduSearchProvider implements SearchProvider {
|
|
7
|
+
readonly id: SearchProviderId;
|
|
8
|
+
readonly requiresApiKey = false;
|
|
9
|
+
searchWeb(query: string, options: WebSearchOptions): Promise<WebSearchResult[]>;
|
|
10
|
+
}
|
|
11
|
+
export declare class NaverSearchProvider implements SearchProvider {
|
|
12
|
+
readonly id: SearchProviderId;
|
|
13
|
+
readonly requiresApiKey = false;
|
|
14
|
+
searchWeb(query: string, options: WebSearchOptions): Promise<WebSearchResult[]>;
|
|
15
|
+
}
|
|
16
|
+
export declare class YahooJapanSearchProvider implements SearchProvider {
|
|
17
|
+
readonly id: SearchProviderId;
|
|
18
|
+
readonly requiresApiKey = false;
|
|
19
|
+
searchWeb(query: string, options: WebSearchOptions): Promise<WebSearchResult[]>;
|
|
20
|
+
}
|
|
21
|
+
export declare class YandexSearchProvider implements SearchProvider {
|
|
22
|
+
readonly id: SearchProviderId;
|
|
23
|
+
readonly requiresApiKey = false;
|
|
24
|
+
searchWeb(query: string, options: WebSearchOptions): Promise<WebSearchResult[]>;
|
|
25
|
+
}
|