webpeel 0.21.89 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/core/cross-verify.d.ts +27 -0
  2. package/dist/core/cross-verify.js +93 -0
  3. package/dist/core/google-serp-parser.d.ts +82 -0
  4. package/dist/core/google-serp-parser.js +287 -0
  5. package/dist/core/search-engines.d.ts +25 -0
  6. package/dist/core/search-engines.js +182 -0
  7. package/dist/core/search-provider.d.ts +5 -1
  8. package/dist/core/search-provider.js +15 -2
  9. package/dist/core/vertical-search.d.ts +53 -0
  10. package/dist/core/vertical-search.js +231 -0
  11. package/dist/index.d.ts +5 -0
  12. package/dist/index.js +4 -0
  13. package/dist/server/app.js +1 -1
  14. package/dist/server/routes/search.js +199 -3
  15. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  16. package/dist/server/routes/smart-search/handlers/cars.js +99 -0
  17. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  18. package/dist/server/routes/smart-search/handlers/flights.js +69 -0
  19. package/dist/server/routes/smart-search/handlers/general.d.ts +2 -0
  20. package/dist/server/routes/smart-search/handlers/general.js +390 -0
  21. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  22. package/dist/server/routes/smart-search/handlers/hotels.js +85 -0
  23. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  24. package/dist/server/routes/smart-search/handlers/products.js +213 -0
  25. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  26. package/dist/server/routes/smart-search/handlers/rental.js +151 -0
  27. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  28. package/dist/server/routes/smart-search/handlers/restaurants.js +205 -0
  29. package/dist/server/routes/smart-search/index.d.ts +19 -0
  30. package/dist/server/routes/smart-search/index.js +508 -0
  31. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  32. package/dist/server/routes/smart-search/intent.js +109 -0
  33. package/dist/server/routes/smart-search/llm.d.ts +8 -0
  34. package/dist/server/routes/smart-search/llm.js +101 -0
  35. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  36. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  37. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  38. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  39. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  40. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  41. package/dist/server/routes/smart-search/types.d.ts +30 -0
  42. package/dist/server/routes/smart-search/types.js +1 -0
  43. package/dist/server/routes/smart-search/utils.d.ts +12 -0
  44. package/dist/server/routes/smart-search/utils.js +97 -0
  45. package/package.json +1 -1
@@ -0,0 +1,182 @@
1
+ /**
2
+ * Additional search engine providers: Baidu, Yandex
3
+ * HTTP-only scraping with cheerio — no browser, no API key required.
4
+ */
5
+ import { load } from 'cheerio';
6
+ import { simpleFetch } from './fetcher.js';
7
+ // ── Baidu Search ──────────────────────────────────────────────────────────
8
+ export class BaiduSearchProvider {
9
+ id = 'baidu';
10
+ requiresApiKey = false;
11
+ async searchWeb(query, options) {
12
+ const { count = 10 } = options;
13
+ // Baidu search URL
14
+ const params = new URLSearchParams({
15
+ wd: query,
16
+ rn: String(Math.min(count, 50)),
17
+ ie: 'utf-8',
18
+ });
19
+ const url = `https://www.baidu.com/s?${params}`;
20
+ try {
21
+ const response = await simpleFetch(url, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 15000);
22
+ if (!response.html)
23
+ return [];
24
+ const $ = load(response.html);
25
+ const results = [];
26
+ const seen = new Set();
27
+ // Baidu result selectors: .result or .c-container
28
+ $('.result, .c-container').each((_, elem) => {
29
+ const el = $(elem);
30
+ const linkEl = el.find('h3 a, .t a').first();
31
+ const title = linkEl.text().trim();
32
+ // Baidu uses redirect URLs — get the data-url or mu attribute for real URL
33
+ const href = el.attr('mu') || linkEl.attr('href') || '';
34
+ const snippet = el.find('.c-abstract, .c-span-last, .content-right_8Zs40').first().text().trim();
35
+ if (title && href && !seen.has(href)) {
36
+ seen.add(href);
37
+ results.push({ title, url: href, snippet });
38
+ }
39
+ });
40
+ return results.slice(0, count);
41
+ }
42
+ catch {
43
+ return [];
44
+ }
45
+ }
46
+ }
47
+ // ── Naver Search (Korea) ──────────────────────────────────────────────────
48
+ export class NaverSearchProvider {
49
+ id = 'naver';
50
+ requiresApiKey = false;
51
+ async searchWeb(query, options) {
52
+ const { count = 10 } = options;
53
+ const params = new URLSearchParams({
54
+ query: query,
55
+ where: 'web',
56
+ });
57
+ const url = `https://search.naver.com/search.naver?${params}`;
58
+ try {
59
+ // Naver is heavily JS-rendered — use peel with render for full content
60
+ const { peel } = await import('../index.js');
61
+ const result = await peel(url, { render: true, format: 'html', wait: 2000, timeout: 15000 });
62
+ const html = result.content || '';
63
+ if (!html)
64
+ return [];
65
+ const $ = load(html);
66
+ const results = [];
67
+ const seen = new Set();
68
+ // Naver uses .type-web class for web results, titles in *-title-text classes
69
+ $('.type-web').each((_, elem) => {
70
+ const el = $(elem);
71
+ const parent = el.closest('li, section, [class*=item]').length ? el.closest('li, section, [class*=item]') : el.parent();
72
+ const title = parent.find('[class*="title-text"], [class*="Title"]').first().text().trim();
73
+ const href = parent.find('a[href^="http"]').first().attr('href') || '';
74
+ const snippet = parent.find('[class*="text-type-body"], [class*="desc"]').first().text().trim();
75
+ if (title && href && !href.includes('naver.com/search') && !seen.has(href)) {
76
+ seen.add(href);
77
+ results.push({ title, url: href, snippet });
78
+ }
79
+ });
80
+ // Fallback: try generic link extraction if .type-web yielded nothing
81
+ if (results.length === 0) {
82
+ $('a[href^="http"]').each((_, elem) => {
83
+ const el = $(elem);
84
+ const href = el.attr('href') || '';
85
+ const title = el.text().trim();
86
+ if (title.length > 5 && title.length < 200 && href && !href.includes('naver.com') && !seen.has(href)) {
87
+ seen.add(href);
88
+ results.push({ title, url: href, snippet: '' });
89
+ }
90
+ });
91
+ }
92
+ return results.slice(0, count);
93
+ }
94
+ catch {
95
+ return [];
96
+ }
97
+ }
98
+ }
99
+ // ── Yahoo Japan Search ────────────────────────────────────────────────────
100
+ export class YahooJapanSearchProvider {
101
+ id = 'yahoo_japan';
102
+ requiresApiKey = false;
103
+ async searchWeb(query, options) {
104
+ const { count = 10 } = options;
105
+ const params = new URLSearchParams({
106
+ p: query,
107
+ n: String(Math.min(count, 50)),
108
+ ei: 'UTF-8',
109
+ });
110
+ const url = `https://search.yahoo.co.jp/search?${params}`;
111
+ try {
112
+ const response = await simpleFetch(url, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36', 15000);
113
+ if (!response.html)
114
+ return [];
115
+ const $ = load(response.html);
116
+ const results = [];
117
+ // Yahoo Japan result selectors (2026 layout uses sw-Card components)
118
+ const seen = new Set();
119
+ $('.sw-Card__title, .algo, .dd').each((_, elem) => {
120
+ const el = $(elem);
121
+ // Walk up to the card container to find the link and snippet
122
+ const card = el.closest('[class*="sw-Card"], .algo, .dd, li').length
123
+ ? el.closest('[class*="sw-Card"], .algo, .dd, li')
124
+ : el.parent();
125
+ const title = el.find('.sw-Card__titleMain, h3, a').first().text().trim() || el.text().trim();
126
+ const href = card.find('a[href^="http"]').first().attr('href') || '';
127
+ const snippet = card.find('.sw-Card__description, .sw-Card__floatText, p').first().text().trim();
128
+ // Filter Yahoo internal links
129
+ if (title && title.length > 3 && href && !href.includes('yahoo.co.jp/search') && !href.includes('cache.yahoofs') && !seen.has(href)) {
130
+ seen.add(href);
131
+ results.push({ title, url: href, snippet });
132
+ }
133
+ });
134
+ return results.slice(0, count);
135
+ }
136
+ catch {
137
+ return [];
138
+ }
139
+ }
140
+ }
141
+ // ── Yandex Search ──────────────────────────────────────────────────────────
142
+ export class YandexSearchProvider {
143
+ id = 'yandex';
144
+ requiresApiKey = false;
145
+ async searchWeb(query, options) {
146
+ const { count = 10 } = options;
147
+ const params = new URLSearchParams({
148
+ text: query,
149
+ numdoc: String(Math.min(count, 50)),
150
+ lr: '84', // Default to US region; can be overridden
151
+ });
152
+ // Use Yandex HTML search
153
+ const url = `https://yandex.com/search/?${params}`;
154
+ try {
155
+ const response = await simpleFetch(url, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 15000);
156
+ if (!response.html)
157
+ return [];
158
+ const $ = load(response.html);
159
+ const results = [];
160
+ const seen = new Set();
161
+ // Yandex result selectors
162
+ $('.serp-item, .organic').each((_, elem) => {
163
+ const el = $(elem);
164
+ const linkEl = el.find('.organic__url, .link, a[href]').first();
165
+ const title = el.find('.organic__title, .OrganicTitle-LinkText, h2').first().text().trim();
166
+ const href = linkEl.attr('href') || '';
167
+ const snippet = el.find('.organic__text, .OrganicText, .text-container').first().text().trim();
168
+ // Filter internal Yandex links
169
+ if (title && href && !href.includes('yandex.') && !seen.has(href)) {
170
+ seen.add(href);
171
+ // Normalize URL (Yandex sometimes uses relative paths)
172
+ const fullUrl = href.startsWith('http') ? href : `https://${href}`;
173
+ results.push({ title, url: fullUrl, snippet });
174
+ }
175
+ });
176
+ return results.slice(0, count);
177
+ }
178
+ catch {
179
+ return [];
180
+ }
181
+ }
182
+ }
@@ -12,7 +12,7 @@
12
12
  * In production with no API keys configured, getBestSearchProvider() returns
13
13
  * StealthSearchProvider since DDG HTTP is often blocked on datacenter IPs.
14
14
  */
15
- export type SearchProviderId = 'duckduckgo' | 'brave' | 'stealth' | 'google';
15
+ export type SearchProviderId = 'duckduckgo' | 'brave' | 'stealth' | 'google' | 'baidu' | 'yandex' | 'naver' | 'yahoo_japan';
16
16
  export interface WebSearchResult {
17
17
  title: string;
18
18
  url: string;
@@ -21,6 +21,8 @@ export interface WebSearchResult {
21
21
  relevanceScore?: number;
22
22
  /** Thumbnail/image URL from SearXNG results (img_src or thumbnail field). */
23
23
  imageUrl?: string;
24
+ /** Structured SERP data when structured=true is passed in WebSearchOptions. */
25
+ serp?: import('./google-serp-parser.js').GoogleSerpResult;
24
26
  }
25
27
  export interface WebSearchOptions {
26
28
  /** Number of results (1-10) */
@@ -42,6 +44,8 @@ export interface WebSearchOptions {
42
44
  locale?: string;
43
45
  /** Optional AbortSignal */
44
46
  signal?: AbortSignal;
47
+ /** Return structured SERP data (knowledge panel, PAA, featured snippets, etc.) */
48
+ structured?: boolean;
45
49
  }
46
50
  export interface SearchProvider {
47
51
  readonly id: SearchProviderId;
@@ -1400,8 +1400,18 @@ export class GoogleSearchProvider {
1400
1400
  const snippet = cleanText(snippetText, { maxLen: 500, stripEllipsisPadding: true });
1401
1401
  results.push({ title, url: validated, snippet });
1402
1402
  });
1403
- if (results.length > 0)
1404
- return results.slice(0, count);
1403
+ if (results.length > 0) {
1404
+ const sliced = results.slice(0, count);
1405
+ // Attach structured SERP data to the first result when structured=true
1406
+ if (options?.structured) {
1407
+ const { parseGoogleSerp } = await import('./google-serp-parser.js');
1408
+ const serp = parseGoogleSerp(html);
1409
+ if (sliced.length > 0) {
1410
+ sliced[0] = { ...sliced[0], serp };
1411
+ }
1412
+ }
1413
+ return sliced;
1414
+ }
1405
1415
  }
1406
1416
  }
1407
1417
  catch (e) {
@@ -1536,6 +1546,9 @@ export function getSearchProvider(id) {
1536
1546
  return new StealthSearchProvider();
1537
1547
  if (id === 'google')
1538
1548
  return new GoogleSearchProvider();
1549
+ // 'baidu' and 'yandex' are handled by BaiduSearchProvider / YandexSearchProvider
1550
+ // from './search-engines.js'. They cannot be imported here (circular dependency).
1551
+ // Use search-engines.ts directly for these providers.
1539
1552
  // Exhaustive fallback (should be unreachable due to typing)
1540
1553
  return new DuckDuckGoProvider();
1541
1554
  }
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Vertical search — specialized endpoints for shopping, news, images, videos.
3
+ * Uses Google vertical search pages + cheerio parsing.
4
+ */
5
+ export interface ShoppingResult {
6
+ title: string;
7
+ price?: string;
8
+ currency?: string;
9
+ store: string;
10
+ url: string;
11
+ imageUrl?: string;
12
+ rating?: number;
13
+ reviewCount?: number;
14
+ condition?: string;
15
+ }
16
+ export interface NewsResult {
17
+ title: string;
18
+ url: string;
19
+ source: string;
20
+ date?: string;
21
+ snippet?: string;
22
+ imageUrl?: string;
23
+ category?: string;
24
+ }
25
+ export interface ImageResult {
26
+ title: string;
27
+ url: string;
28
+ imageUrl: string;
29
+ width?: number;
30
+ height?: number;
31
+ source?: string;
32
+ }
33
+ export interface VideoResult {
34
+ title: string;
35
+ url: string;
36
+ platform: string;
37
+ duration?: string;
38
+ date?: string;
39
+ thumbnailUrl?: string;
40
+ channel?: string;
41
+ views?: string;
42
+ }
43
+ export interface VerticalSearchOptions {
44
+ query: string;
45
+ count?: number;
46
+ country?: string;
47
+ language?: string;
48
+ freshness?: string;
49
+ }
50
+ export declare function searchShopping(opts: VerticalSearchOptions): Promise<ShoppingResult[]>;
51
+ export declare function searchNews(opts: VerticalSearchOptions): Promise<NewsResult[]>;
52
+ export declare function searchImages(opts: VerticalSearchOptions): Promise<ImageResult[]>;
53
+ export declare function searchVideos(opts: VerticalSearchOptions): Promise<VideoResult[]>;
@@ -0,0 +1,231 @@
1
+ /**
2
+ * Vertical search — specialized endpoints for shopping, news, images, videos.
3
+ * Uses Google vertical search pages + cheerio parsing.
4
+ */
5
+ import { load } from 'cheerio';
6
+ // ── Shopping Search ────────────────────────────────────────────────────────
7
+ export async function searchShopping(opts) {
8
+ const { query, count = 10, country, language } = opts;
9
+ // Strategy: Use Google Shopping via peel() with render
10
+ const { peel } = await import('../index.js');
11
+ const params = new URLSearchParams({
12
+ q: query,
13
+ tbm: 'shop', // Google Shopping mode
14
+ num: String(Math.min(count * 2, 40)),
15
+ });
16
+ if (country)
17
+ params.set('gl', country.toLowerCase());
18
+ if (language)
19
+ params.set('hl', language);
20
+ const url = `https://www.google.com/search?${params}`;
21
+ try {
22
+ const result = await peel(url, {
23
+ render: true,
24
+ stealth: true,
25
+ format: 'html',
26
+ wait: 3000,
27
+ timeout: 15000,
28
+ });
29
+ const html = result.content || '';
30
+ if (!html || html.length < 500)
31
+ return [];
32
+ const $ = load(html);
33
+ const items = [];
34
+ // Google Shopping result selectors
35
+ $('.sh-dgr__content, .sh-dlr__list-result, .mnr-c .pla-unit, [data-docid], .KZmu8e').each((_, elem) => {
36
+ const el = $(elem);
37
+ const title = el.find('.tAxDx, .pymv4e, h3, .Xjkr3b').first().text().trim();
38
+ const price = el.find('.a8Pemb, .e10twf, .HRLxBb, .kHxwFf').first().text().trim();
39
+ const store = el.find('.aULzUe, .LbUacb, .dD8iuc, .IuHnof').first().text().trim();
40
+ const link = el.find('a[href]').first().attr('href') || '';
41
+ const img = el.find('img').first().attr('src') || '';
42
+ const ratingText = el.find('.Rsc7Yb, .yi40Hd').first().text().trim();
43
+ const reviewText = el.find('.QhqGkb, .RDApEe').first().text().trim();
44
+ if (title && (price || store)) {
45
+ items.push({
46
+ title,
47
+ price: price || undefined,
48
+ store: store || 'Unknown',
49
+ url: link.startsWith('http')
50
+ ? link
51
+ : link.startsWith('/')
52
+ ? `https://www.google.com${link}`
53
+ : link,
54
+ imageUrl: img.startsWith('http') ? img : undefined,
55
+ rating: parseFloat(ratingText) || undefined,
56
+ reviewCount: parseInt(reviewText.replace(/[^0-9]/g, '')) || undefined,
57
+ });
58
+ }
59
+ });
60
+ return items.slice(0, count);
61
+ }
62
+ catch {
63
+ return [];
64
+ }
65
+ }
66
+ // ── News Search ────────────────────────────────────────────────────────────
67
+ export async function searchNews(opts) {
68
+ const { query, count = 10, language, freshness } = opts;
69
+ const { peel } = await import('../index.js');
70
+ const params = new URLSearchParams({
71
+ q: query,
72
+ tbm: 'nws', // Google News mode
73
+ num: String(Math.min(count * 2, 40)),
74
+ });
75
+ if (language)
76
+ params.set('hl', language);
77
+ if (freshness === 'day')
78
+ params.set('tbs', 'qdr:d');
79
+ else if (freshness === 'week')
80
+ params.set('tbs', 'qdr:w');
81
+ else if (freshness === 'month')
82
+ params.set('tbs', 'qdr:m');
83
+ const url = `https://www.google.com/search?${params}`;
84
+ try {
85
+ const result = await peel(url, {
86
+ render: true,
87
+ stealth: true,
88
+ format: 'html',
89
+ wait: 3000,
90
+ timeout: 15000,
91
+ });
92
+ const html = result.content || '';
93
+ if (!html || html.length < 500)
94
+ return [];
95
+ const $ = load(html);
96
+ const items = [];
97
+ // Google News result selectors
98
+ $('.WlydOe, .JJZKK, .SoaBEf, .dbsr, [jscontroller="d0DtYd"]').each((_, elem) => {
99
+ const el = $(elem);
100
+ const title = el.find('[role="heading"], .mCBkyc, .nDgy9d, .JheGif').first().text().trim();
101
+ const link = el.find('a[href^="http"]').first().attr('href') || '';
102
+ const source = el.find('.NUnG9d, .CEMjEf, .XTjFC, .wEwyrc').first().text().trim();
103
+ const date = el.find('.OSrXXb, .WG9SHc, .f').first().text().trim();
104
+ const snippet = el.find('.GI74Re, .Y3v8qd, .VwiC3b').first().text().trim();
105
+ const img = el.find('img[src^="http"]').first().attr('src') || '';
106
+ if (title && link) {
107
+ items.push({
108
+ title,
109
+ url: link,
110
+ source: source || 'Unknown',
111
+ date: date || undefined,
112
+ snippet: snippet || undefined,
113
+ imageUrl: img || undefined,
114
+ });
115
+ }
116
+ });
117
+ return items.slice(0, count);
118
+ }
119
+ catch {
120
+ return [];
121
+ }
122
+ }
123
+ // ── Image Search ────────────────────────────────────────────────────────────
124
+ export async function searchImages(opts) {
125
+ const { query, count = 20 } = opts;
126
+ // Use Bing Images (more scrape-friendly than Google Images)
127
+ const { peel } = await import('../index.js');
128
+ const params = new URLSearchParams({ q: query, form: 'HDRSC2', first: '1' });
129
+ const url = `https://www.bing.com/images/search?${params}`;
130
+ try {
131
+ const result = await peel(url, { render: true, wait: 2000, timeout: 15000 });
132
+ const html = result.content || '';
133
+ if (!html || html.length < 500)
134
+ return [];
135
+ const $ = load(html);
136
+ const items = [];
137
+ // Bing Images selectors
138
+ $('.iusc, .imgpt, [data-idx]').each((_, elem) => {
139
+ const el = $(elem);
140
+ // Bing stores image data in a JSON attribute 'm'
141
+ const mData = el.attr('m');
142
+ if (mData) {
143
+ try {
144
+ const m = JSON.parse(mData);
145
+ items.push({
146
+ title: m.t || el.find('img').attr('alt') || '',
147
+ url: m.purl || '',
148
+ imageUrl: m.murl || m.turl || '',
149
+ width: m.w || undefined,
150
+ height: m.h || undefined,
151
+ source: m.desc || undefined,
152
+ });
153
+ }
154
+ catch {
155
+ /* skip malformed JSON */
156
+ }
157
+ }
158
+ else {
159
+ // Fallback: direct img extraction
160
+ const img = el.find('img');
161
+ const imgSrc = img.attr('src') || img.attr('data-src') || '';
162
+ const title = img.attr('alt') || '';
163
+ if (imgSrc && imgSrc.startsWith('http')) {
164
+ items.push({
165
+ title,
166
+ url: el.find('a[href]').first().attr('href') || '',
167
+ imageUrl: imgSrc,
168
+ });
169
+ }
170
+ }
171
+ });
172
+ return items.slice(0, count);
173
+ }
174
+ catch {
175
+ return [];
176
+ }
177
+ }
178
+ // ── Video Search ────────────────────────────────────────────────────────────
179
+ export async function searchVideos(opts) {
180
+ const { query, count = 10 } = opts;
181
+ const { peel } = await import('../index.js');
182
+ const params = new URLSearchParams({
183
+ q: query,
184
+ tbm: 'vid', // Google Videos mode
185
+ num: String(Math.min(count * 2, 20)),
186
+ });
187
+ const url = `https://www.google.com/search?${params}`;
188
+ try {
189
+ const result = await peel(url, {
190
+ render: true,
191
+ stealth: true,
192
+ format: 'html',
193
+ wait: 3000,
194
+ timeout: 15000,
195
+ });
196
+ const html = result.content || '';
197
+ if (!html || html.length < 500)
198
+ return [];
199
+ const $ = load(html);
200
+ const items = [];
201
+ // Google Video result selectors
202
+ $('[data-surl], .dXiKIc, .g, .RzdJxc').each((_, elem) => {
203
+ const el = $(elem);
204
+ const title = el.find('h3, .fc9yUc, [aria-label]').first().text().trim();
205
+ const link = el.find('a[href^="http"]').first().attr('href') || '';
206
+ const duration = el.find('.J1mWY, .FGpTBd, .vdur').first().text().trim();
207
+ const date = el.find('.OSrXXb, .f').first().text().trim();
208
+ const channel = el.find('.pcJO7e, .GlPvmc').first().text().trim();
209
+ const thumb = el.find('img[src^="http"]').first().attr('src') || '';
210
+ if (title && link && !link.includes('google.com/search')) {
211
+ items.push({
212
+ title,
213
+ url: link,
214
+ platform: link.includes('youtube')
215
+ ? 'YouTube'
216
+ : link.includes('vimeo')
217
+ ? 'Vimeo'
218
+ : 'Web',
219
+ duration: duration || undefined,
220
+ date: date || undefined,
221
+ channel: channel || undefined,
222
+ thumbnailUrl: thumb || undefined,
223
+ });
224
+ }
225
+ });
226
+ return items.slice(0, count);
227
+ }
228
+ catch {
229
+ return [];
230
+ }
231
+ }
package/dist/index.d.ts CHANGED
@@ -18,7 +18,10 @@ export { extractInlineJson, type InlineExtractOptions, type InlineExtractResult
18
18
  export { runAgent, type AgentOptions, type AgentResult, type AgentProgress, type AgentStreamEvent, type AgentDepth, type AgentTopic } from './core/agent.js';
19
19
  export { summarizeContent, type SummarizeOptions } from './core/summarize.js';
20
20
  export { getSearchProvider, DuckDuckGoProvider, BraveSearchProvider, providerStats, type SearchProvider, type SearchProviderId, type WebSearchResult, type WebSearchOptions, } from './core/search-provider.js';
21
+ export { BaiduSearchProvider, YandexSearchProvider, NaverSearchProvider, YahooJapanSearchProvider } from './core/search-engines.js';
22
+ export { crossVerifySearch, type CrossVerifyResult } from './core/cross-verify.js';
21
23
  export { answerQuestion, type AnswerRequest, type AnswerResponse, type AnswerCitation, type LLMProviderId, type TokensUsed, } from './core/answer.js';
24
+ export { parseGoogleSerp, type GoogleSerpResult } from './core/google-serp-parser.js';
22
25
  export { searchJobs, type JobCard, type JobDetail, type JobSearchOptions, type JobSearchResult } from './core/jobs.js';
23
26
  export { RateGovernor, formatDuration, type RateConfig, type RateState, type CanApplyResult, } from './core/rate-governor.js';
24
27
  export { ApplicationTracker, type ApplicationRecord, type ApplicationFilter, type ApplicationStats, type ApplicationStatus, } from './core/application-tracker.js';
@@ -52,6 +55,8 @@ export { localSearch, type LocalSearchOptions, type LocalSearchResult, type Loca
52
55
  export { getBusinessIntel, type BusinessIntel } from './core/business-intel.js';
53
56
  export { CircuitBreaker, browserCircuitBreaker, type CircuitState } from './core/circuit-breaker.js';
54
57
  export { checkMemoryPressure } from './core/browser-pool.js';
58
+ export { searchShopping, searchNews, searchImages, searchVideos } from './core/vertical-search.js';
59
+ export type { ShoppingResult, NewsResult, ImageResult, VideoResult, VerticalSearchOptions } from './core/vertical-search.js';
55
60
  /**
56
61
  * Fetch and extract content from a URL
57
62
  *
package/dist/index.js CHANGED
@@ -21,7 +21,10 @@ export { extractInlineJson } from './core/extract-inline.js';
21
21
  export { runAgent } from './core/agent.js';
22
22
  export { summarizeContent } from './core/summarize.js';
23
23
  export { getSearchProvider, DuckDuckGoProvider, BraveSearchProvider, providerStats, } from './core/search-provider.js';
24
+ export { BaiduSearchProvider, YandexSearchProvider, NaverSearchProvider, YahooJapanSearchProvider } from './core/search-engines.js';
25
+ export { crossVerifySearch } from './core/cross-verify.js';
24
26
  export { answerQuestion, } from './core/answer.js';
27
+ export { parseGoogleSerp } from './core/google-serp-parser.js';
25
28
  export { searchJobs } from './core/jobs.js';
26
29
  export { RateGovernor, formatDuration, } from './core/rate-governor.js';
27
30
  export { ApplicationTracker, } from './core/application-tracker.js';
@@ -60,6 +63,7 @@ export { localSearch } from './core/local-search.js';
60
63
  export { getBusinessIntel } from './core/business-intel.js';
61
64
  export { CircuitBreaker, browserCircuitBreaker } from './core/circuit-breaker.js';
62
65
  export { checkMemoryPressure } from './core/browser-pool.js';
66
+ export { searchShopping, searchNews, searchImages, searchVideos } from './core/vertical-search.js';
63
67
  /**
64
68
  * Fetch and extract content from a URL
65
69
  *
@@ -19,7 +19,7 @@ import { createRateLimitMiddleware, RateLimiter } from './middleware/rate-limit.
19
19
  import { createHealthRouter } from './routes/health.js';
20
20
  import { createFetchRouter } from './routes/fetch.js';
21
21
  import { createSearchRouter } from './routes/search.js';
22
- import { createSmartSearchRouter } from './routes/smart-search.js';
22
+ import { createSmartSearchRouter } from './routes/smart-search/index.js';
23
23
  import { createUserRouter } from './routes/users.js';
24
24
  import { createStripeRouter, createBillingPortalRouter } from './routes/stripe.js';
25
25
  import { createOAuthRouter } from './routes/oauth.js';