@isdk/web-searcher 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.cn.md +168 -8
  2. package/README.md +168 -8
  3. package/dist/index.d.mts +221 -12
  4. package/dist/index.d.ts +221 -12
  5. package/dist/index.js +1 -1
  6. package/dist/index.mjs +1 -1
  7. package/docs/README.md +168 -8
  8. package/docs/classes/GoogleSearcher.md +171 -44
  9. package/docs/classes/WebSearcher.md +158 -45
  10. package/docs/functions/extractDate.md +42 -0
  11. package/docs/functions/extractMetadataFrom.md +40 -0
  12. package/docs/functions/fetchHeaders.md +34 -0
  13. package/docs/functions/fetchPartial.md +41 -0
  14. package/docs/functions/normalizeDate.md +29 -0
  15. package/docs/functions/parseHeaders.md +28 -0
  16. package/docs/functions/parseHtml.md +31 -0
  17. package/docs/functions/testUrlsByLatency.md +38 -0
  18. package/docs/globals.md +18 -0
  19. package/docs/interfaces/CustomTimeRange.md +3 -3
  20. package/docs/interfaces/ExtractOptions.md +54 -0
  21. package/docs/interfaces/FetchExtractorOptions.md +35 -0
  22. package/docs/interfaces/FetcherOptions.md +424 -0
  23. package/docs/interfaces/HtmlData.md +53 -0
  24. package/docs/interfaces/MetadataResult.md +27 -0
  25. package/docs/interfaces/PaginationConfig.md +9 -9
  26. package/docs/interfaces/SearchContext.md +30 -4
  27. package/docs/interfaces/SearchOptions.md +77 -11
  28. package/docs/interfaces/StandardSearchResult.md +10 -10
  29. package/docs/interfaces/VerifiedUrl.md +25 -0
  30. package/docs/type-aliases/MetadataType.md +13 -0
  31. package/docs/type-aliases/SafeSearchLevel.md +1 -1
  32. package/docs/type-aliases/SearchCategory.md +2 -2
  33. package/docs/type-aliases/SearchTimeRange.md +1 -1
  34. package/docs/type-aliases/SearchTimeRangePreset.md +2 -2
  35. package/docs/type-aliases/SearcherConstructor.md +2 -2
  36. package/package.json +3 -2
package/dist/index.d.mts CHANGED
@@ -1,5 +1,6 @@
1
1
  import * as _isdk_web_fetcher from '@isdk/web-fetcher';
2
2
  import { FetcherOptions, FetchSession } from '@isdk/web-fetcher';
3
+ export { FetcherOptions } from '@isdk/web-fetcher';
3
4
  import { IBaseFactoryOptions } from 'custom-factory';
4
5
 
5
6
  /**
@@ -83,8 +84,14 @@ interface SearchContext {
83
84
  page: number;
84
85
  /** The requested limit of results. */
85
86
  limit?: number;
87
+ /** Allows for custom variables passed via search options. */
88
+ [key: string]: any;
89
+ /** The baseUrl used for this specific fetch (if multi-instance is enabled) */
90
+ baseUrl?: string;
91
+ /** The name of the engine executing the search */
92
+ engine?: string;
86
93
  }
87
- type SearchTimeRangePreset = 'all' | 'day' | 'week' | 'month' | 'year';
94
+ type SearchTimeRangePreset = 'all' | 'hour' | 'day' | 'week' | 'month' | 'year';
88
95
  interface CustomTimeRange {
89
96
  /** Start date (Date object or string like 'YYYY-MM-DD'). */
90
97
  from: Date | string;
@@ -92,7 +99,7 @@ interface CustomTimeRange {
92
99
  to?: Date | string;
93
100
  }
94
101
  type SearchTimeRange = SearchTimeRangePreset | CustomTimeRange;
95
- type SearchCategory = 'all' | 'images' | 'videos' | 'news';
102
+ type SearchCategory = 'all' | 'images' | 'videos' | 'news' | string;
96
103
  type SafeSearchLevel = 'off' | 'moderate' | 'strict';
97
104
  /**
98
105
  * Options provided when executing a search.
@@ -139,12 +146,187 @@ interface SearchOptions {
139
146
  transform?: (results: StandardSearchResult[], context: SearchContext) => Promise<StandardSearchResult[]> | StandardSearchResult[];
140
147
  /** Any other custom variables to be injected into the template. */
141
148
  [key: string]: any;
149
+ /**
150
+ * Allows the user to dynamically specify or override the base URLs for the engines.
151
+ * Can be an array of URLs for a single engine, or a map of engine names to URL arrays.
152
+ */
153
+ baseUrls?: string[] | Record<string, string[]>;
154
+ /**
155
+ * User-defined callback to validate the fetched results for a page.
156
+ * If it returns false, the fetch is considered a failure, triggering the retry/failover mechanism.
157
+ */
158
+ validator?: (results: StandardSearchResult[], context: SearchContext) => boolean | Promise<boolean>;
159
+ /**
160
+ * If true (default), the searcher will attempt to fulfill the requested `limit`
161
+ * by falling back to subsequent engines in the chain if previous ones are exhausted.
162
+ * If false, it will stop after the first successful engine regardless of whether
163
+ * the limit was reached.
164
+ */
165
+ fillLimit?: boolean;
166
+ /**
167
+ * Specifies which page index to start the search from.
168
+ * Useful when delegating pagination across different sessions.
169
+ * @default 0
170
+ */
171
+ startPage?: number;
172
+ }
173
+
174
+ /**
175
+ * Options for network requests.
176
+ */
177
+ interface FetchExtractorOptions {
178
+ /** Timeout in milliseconds. Defaults vary by function (5s to 10s). */
179
+ timeout?: number;
180
+ /** Custom HTTP headers to include in the request. */
181
+ headers?: Record<string, string>;
182
+ }
183
+ /**
184
+ * Fetches only the HTTP headers for a given URL using a HEAD request.
185
+ * Useful for checking 'last-modified' without downloading the body.
186
+ *
187
+ * @param url - The URL to check.
188
+ * @param options - Request options.
189
+ * @returns The Headers object, or null on failure.
190
+ */
191
+ declare function fetchHeaders(url: string, options?: FetchExtractorOptions): Promise<Headers | null>;
192
+ /**
193
+ * Fetches a partial amount of content from a URL.
194
+ * Automatically handles character set detection from the Content-Type header.
195
+ * Aborts the request once the specified maxBytes is reached.
196
+ *
197
+ * @param url - The URL to fetch.
198
+ * @param maxBytes - The maximum number of bytes to read. Defaults to 32KB.
199
+ * @param options - Request options.
200
+ * @returns An object containing the decoded content string and the response headers.
201
+ */
202
+ declare function fetchPartial(url: string, maxBytes?: number, options?: FetchExtractorOptions): Promise<{
203
+ content: string;
204
+ headers: Headers;
205
+ } | null>;
206
+
207
+ /**
208
+ * Represents structured data extracted from an HTML document.
209
+ */
210
+ interface HtmlData {
211
+ /** Map of meta tag names/properties to their content. Keys are lowercase. */
212
+ meta: Record<string, string>;
213
+ /** Array of parsed JSON-LD objects found in the document. */
214
+ jsonLd: any[];
215
+ /** Array of data from HTML <time> tags. */
216
+ time: Array<{
217
+ /** The value of the 'datetime' attribute, if present. */
218
+ datetime: string | null;
219
+ /** The text content within the <time> tag, with HTML stripped. */
220
+ text: string;
221
+ }>;
142
222
  }
223
+ /**
224
+ * Converts a Web API Headers object into a plain JavaScript record.
225
+ * All header names are converted to lowercase for consistent access.
226
+ *
227
+ * @param headers - The Headers object to parse.
228
+ * @returns A record where keys are lowercase header names.
229
+ */
230
+ declare function parseHeaders(headers: Headers): Record<string, string>;
231
+ /**
232
+
233
+ * Parses an HTML string to extract generic metadata structures (Meta tags, JSON-LD, Time tags).
234
+
235
+ * This function does not perform field-specific logic (like finding a date); it simply
236
+
237
+ * collects available structured data.
238
+
239
+ *
240
+
241
+ * @param html - The raw HTML content to parse.
242
+
243
+ * @returns An object containing grouped metadata from the HTML.
244
+
245
+ */
246
+ declare function parseHtml(html: string): HtmlData;
247
+
248
+ /**
249
+ * Result object for generic metadata extraction.
250
+ */
251
+ interface MetadataResult {
252
+ /** The extracted and normalized date, if any. */
253
+ date?: string | null;
254
+ /** Placeholders for future metadata fields. */
255
+ [key: string]: any;
256
+ }
257
+ /**
258
+ * Supported metadata types for extraction.
259
+ */
260
+ type MetadataType = 'date' | string;
261
+ /**
262
+ * Extracts specific metadata from parsed HTML and headers based on a requested type.
263
+ * Currently supports 'date' extraction with a prioritized fallback mechanism.
264
+ *
265
+ * @param result - An object containing the raw HTML content and response headers.
266
+ * @param type - The type of metadata to extract.
267
+ * @returns The extracted and normalized value, or null if not found.
268
+ */
269
+ declare function extractMetadataFrom(result: {
270
+ content: string;
271
+ headers: Headers;
272
+ }, type: MetadataType): string | null;
273
+
274
+ /**
275
+ * Normalizes a date string into a standard ISO 8601 format (UTC).
276
+ * It handles various formats (YYYY-MM-DD, RFC2822, etc.) and performs
277
+ * aggressive cleaning and sanity checks.
278
+ *
279
+ * @param dateStr - The raw date string to normalize.
280
+ * @returns An ISO 8601 string (e.g., "2024-01-20T00:00:00.000Z") or null if invalid.
281
+ */
282
+ declare function normalizeDate(dateStr: string | null): string | null;
283
+
284
+ /**
285
+ * Options for the extractDate function.
286
+ */
287
+ interface ExtractOptions extends FetchExtractorOptions {
288
+ /**
289
+ * Maximum number of bytes to download from the URL.
290
+ * Defaults to 32768 (32KB), which is usually enough for the HTML <head>.
291
+ */
292
+ maxBytes?: number;
293
+ }
294
+ /**
295
+ * High-level convenience function to extract the publication or modification date from a URL.
296
+ * It performs a partial fetch of the content and applies multiple extraction rules
297
+ * (LD+JSON, Meta tags, Time tags, Headers) to find the most reliable date.
298
+ *
299
+ * @param url - The web page URL to analyze.
300
+ * @param options - Fetch and extraction options.
301
+ * @returns An ISO 8601 date string, or null if no valid date could be found.
302
+ *
303
+ * @example
304
+ * ```ts
305
+ * const date = await extractDate('https://example.com/article');
306
+ * console.log(date); // "2024-01-20T12:00:00.000Z"
307
+ * ```
308
+ */
309
+ declare function extractDate(url: string, options?: ExtractOptions): Promise<string | null>;
310
+
311
+ interface VerifiedUrl {
312
+ url: string;
313
+ latency: number;
314
+ }
315
+ /**
316
+ * A general utility to test a list of URLs for availability and latency.
317
+ * Returns a list of verified URLs sorted by response time.
318
+ */
319
+ declare function testUrlsByLatency(urls: string[], options?: {
320
+ timeout?: number;
321
+ limit?: number;
322
+ testPath?: string;
323
+ }): Promise<VerifiedUrl[]>;
143
324
 
144
325
  /**
145
326
  * Constructor definition for Searcher subclasses.
146
327
  */
147
328
  type SearcherConstructor = new (options?: FetcherOptions) => WebSearcher;
329
+
148
330
  /**
149
331
  * The abstract base class for all search engines.
150
332
  *
@@ -176,6 +358,10 @@ declare abstract class WebSearcher extends FetchSession {
176
358
  * Useful for registering shorthand names (e.g., 'g' for 'Google').
177
359
  */
178
360
  static alias?: string | string[];
361
+ /** Default base URLs for engines that support multiple instances. */
362
+ static defaultBaseUrls?: string[];
363
+ /** Globally shared index for tracking the currently active instance (node) across sessions. */
364
+ static currentInstanceIndex?: number;
179
365
  /**
180
366
  * Registers a search engine class.
181
367
  *
@@ -219,23 +405,25 @@ declare abstract class WebSearcher extends FetchSession {
219
405
  */
220
406
  static setAliases: (ctor: typeof WebSearcher, ...aliases: string[]) => void;
221
407
  /**
222
- * Static helper to execute a one-off search.
408
+ * Static helper to execute a one-off search or a fallback chain.
223
409
  *
224
- * It creates an instance of the specified engine, executes the search, and then
225
- * automatically disposes of the session.
410
+ * It creates an instance of the specified engine(s), executes the search, and automatically
411
+ * falls back to the next engine in the list if the current one fails or is exhausted.
226
412
  *
227
- * @param engineName - The name of the engine to use (e.g., 'Google').
413
+ * @param engineNames - The name(s) of the engine(s) to use (e.g., 'Google' or ['SearXNG', 'Google']).
228
414
  * @param query - The search query string.
229
415
  * @param options - Combined search options and fetcher options.
230
416
  * @returns A promise resolving to an array of standardized search results.
231
417
  */
232
- static search(engineName: string, query: string, options?: SearchOptions & FetcherOptions): Promise<StandardSearchResult[]>;
418
+ static search(engineNames: string | string[], query: string, options?: SearchOptions & FetcherOptions): Promise<StandardSearchResult[]>;
233
419
  /**
234
420
  * The declarative template for the fetch options.
235
421
  *
236
- * Subclasses **must** implement this getter to provide the engine configuration,
422
+ * Subclasses can implement this getter to provide the engine configuration,
237
423
  * including the base URL, search parameters pattern, and extraction rules.
238
424
  *
425
+ * This getter is **optional** if you override {@link getTemplate}.
426
+ *
239
427
  * Supports variable injection using syntax like `${query}`, `${offset}`, etc.
240
428
  *
241
429
  * @example
@@ -248,7 +436,7 @@ declare abstract class WebSearcher extends FetchSession {
248
436
  * }
249
437
  * ```
250
438
  */
251
- abstract get template(): FetcherOptions;
439
+ get template(): FetcherOptions;
252
440
  /**
253
441
  * Optional pagination configuration.
254
442
  * Defines how the searcher navigates to subsequent pages.
@@ -256,18 +444,39 @@ declare abstract class WebSearcher extends FetchSession {
256
444
  * If undefined, the searcher will only fetch the first page.
257
445
  */
258
446
  get pagination(): PaginationConfig | undefined;
447
+ /**
448
+ * Dynamically retrieves the fetch template based on current variables and search options.
449
+ *
450
+ * Subclasses can override this method to return different extraction rules (actions)
451
+ * or URL patterns based on the search category, region, or other parameters.
452
+ *
453
+ * @param variables - The calculated variables (from formatOptions, pagination, etc.).
454
+ * @param options - The original search options provided by the user.
455
+ * @returns The fetcher configuration to be used for the current request.
456
+ */
457
+ protected getTemplate(variables: Record<string, any>, options: SearchOptions): FetcherOptions;
259
458
  protected createContext(options?: FetcherOptions): _isdk_web_fetcher.FetchContext;
260
459
  /**
261
460
  * Executes a search query.
262
461
  *
263
- * This method handles the pagination loop, variable injection, fetching,
264
- * and result transformation.
462
+ * This method handles the pagination loop, multi-instance failover, variable injection,
463
+ * fetching, and result transformation.
265
464
  *
266
465
  * @param query - The search query string.
267
466
  * @param options - Optional search parameters (e.g., limit, timeRange).
268
467
  * @returns A promise resolving to an array of standardized search results.
269
468
  */
270
469
  search(query: string, options?: SearchOptions): Promise<StandardSearchResult[]>;
470
+ /**
471
+ * Hook for subclasses to validate fetched results before they are accepted.
472
+ * If this returns false, the instance manager will consider the fetch a failure
473
+ * and automatically switch to the next available baseUrl (if any).
474
+ *
475
+ * @param results - The extracted results.
476
+ * @param context - Context including the current baseUrl and page.
477
+ * @returns A promise resolving to true if valid, false otherwise.
478
+ */
479
+ protected validateFetchResult(results: StandardSearchResult[], context: SearchContext): Promise<boolean>;
271
480
  /**
272
481
  * Transform and clean the raw extracted results.
273
482
  *
@@ -347,4 +556,4 @@ declare class GoogleSearcher extends WebSearcher {
347
556
  protected transform(outputs: Record<string, any>): Promise<any[]>;
348
557
  }
349
558
 
350
- export { type CustomTimeRange, GoogleSearcher, type PaginationConfig, type SafeSearchLevel, type SearchCategory, type SearchContext, type SearchOptions, type SearchTimeRange, type SearchTimeRangePreset, type SearcherConstructor, type StandardSearchResult, WebSearcher };
559
+ export { type CustomTimeRange, type ExtractOptions, type FetchExtractorOptions, GoogleSearcher, type HtmlData, type MetadataResult, type MetadataType, type PaginationConfig, type SafeSearchLevel, type SearchCategory, type SearchContext, type SearchOptions, type SearchTimeRange, type SearchTimeRangePreset, type SearcherConstructor, type StandardSearchResult, type VerifiedUrl, WebSearcher, extractDate, extractMetadataFrom, fetchHeaders, fetchPartial, normalizeDate, parseHeaders, parseHtml, testUrlsByLatency };
package/dist/index.d.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import * as _isdk_web_fetcher from '@isdk/web-fetcher';
2
2
  import { FetcherOptions, FetchSession } from '@isdk/web-fetcher';
3
+ export { FetcherOptions } from '@isdk/web-fetcher';
3
4
  import { IBaseFactoryOptions } from 'custom-factory';
4
5
 
5
6
  /**
@@ -83,8 +84,14 @@ interface SearchContext {
83
84
  page: number;
84
85
  /** The requested limit of results. */
85
86
  limit?: number;
87
+ /** Allows for custom variables passed via search options. */
88
+ [key: string]: any;
89
+ /** The baseUrl used for this specific fetch (if multi-instance is enabled) */
90
+ baseUrl?: string;
91
+ /** The name of the engine executing the search */
92
+ engine?: string;
86
93
  }
87
- type SearchTimeRangePreset = 'all' | 'day' | 'week' | 'month' | 'year';
94
+ type SearchTimeRangePreset = 'all' | 'hour' | 'day' | 'week' | 'month' | 'year';
88
95
  interface CustomTimeRange {
89
96
  /** Start date (Date object or string like 'YYYY-MM-DD'). */
90
97
  from: Date | string;
@@ -92,7 +99,7 @@ interface CustomTimeRange {
92
99
  to?: Date | string;
93
100
  }
94
101
  type SearchTimeRange = SearchTimeRangePreset | CustomTimeRange;
95
- type SearchCategory = 'all' | 'images' | 'videos' | 'news';
102
+ type SearchCategory = 'all' | 'images' | 'videos' | 'news' | string;
96
103
  type SafeSearchLevel = 'off' | 'moderate' | 'strict';
97
104
  /**
98
105
  * Options provided when executing a search.
@@ -139,12 +146,187 @@ interface SearchOptions {
139
146
  transform?: (results: StandardSearchResult[], context: SearchContext) => Promise<StandardSearchResult[]> | StandardSearchResult[];
140
147
  /** Any other custom variables to be injected into the template. */
141
148
  [key: string]: any;
149
+ /**
150
+ * Allows the user to dynamically specify or override the base URLs for the engines.
151
+ * Can be an array of URLs for a single engine, or a map of engine names to URL arrays.
152
+ */
153
+ baseUrls?: string[] | Record<string, string[]>;
154
+ /**
155
+ * User-defined callback to validate the fetched results for a page.
156
+ * If it returns false, the fetch is considered a failure, triggering the retry/failover mechanism.
157
+ */
158
+ validator?: (results: StandardSearchResult[], context: SearchContext) => boolean | Promise<boolean>;
159
+ /**
160
+ * If true (default), the searcher will attempt to fulfill the requested `limit`
161
+ * by falling back to subsequent engines in the chain if previous ones are exhausted.
162
+ * If false, it will stop after the first successful engine regardless of whether
163
+ * the limit was reached.
164
+ */
165
+ fillLimit?: boolean;
166
+ /**
167
+ * Specifies which page index to start the search from.
168
+ * Useful when delegating pagination across different sessions.
169
+ * @default 0
170
+ */
171
+ startPage?: number;
172
+ }
173
+
174
+ /**
175
+ * Options for network requests.
176
+ */
177
+ interface FetchExtractorOptions {
178
+ /** Timeout in milliseconds. Defaults vary by function (5s to 10s). */
179
+ timeout?: number;
180
+ /** Custom HTTP headers to include in the request. */
181
+ headers?: Record<string, string>;
182
+ }
183
+ /**
184
+ * Fetches only the HTTP headers for a given URL using a HEAD request.
185
+ * Useful for checking 'last-modified' without downloading the body.
186
+ *
187
+ * @param url - The URL to check.
188
+ * @param options - Request options.
189
+ * @returns The Headers object, or null on failure.
190
+ */
191
+ declare function fetchHeaders(url: string, options?: FetchExtractorOptions): Promise<Headers | null>;
192
+ /**
193
+ * Fetches a partial amount of content from a URL.
194
+ * Automatically handles character set detection from the Content-Type header.
195
+ * Aborts the request once the specified maxBytes is reached.
196
+ *
197
+ * @param url - The URL to fetch.
198
+ * @param maxBytes - The maximum number of bytes to read. Defaults to 32KB.
199
+ * @param options - Request options.
200
+ * @returns An object containing the decoded content string and the response headers.
201
+ */
202
+ declare function fetchPartial(url: string, maxBytes?: number, options?: FetchExtractorOptions): Promise<{
203
+ content: string;
204
+ headers: Headers;
205
+ } | null>;
206
+
207
+ /**
208
+ * Represents structured data extracted from an HTML document.
209
+ */
210
+ interface HtmlData {
211
+ /** Map of meta tag names/properties to their content. Keys are lowercase. */
212
+ meta: Record<string, string>;
213
+ /** Array of parsed JSON-LD objects found in the document. */
214
+ jsonLd: any[];
215
+ /** Array of data from HTML <time> tags. */
216
+ time: Array<{
217
+ /** The value of the 'datetime' attribute, if present. */
218
+ datetime: string | null;
219
+ /** The text content within the <time> tag, with HTML stripped. */
220
+ text: string;
221
+ }>;
142
222
  }
223
+ /**
224
+ * Converts a Web API Headers object into a plain JavaScript record.
225
+ * All header names are converted to lowercase for consistent access.
226
+ *
227
+ * @param headers - The Headers object to parse.
228
+ * @returns A record where keys are lowercase header names.
229
+ */
230
+ declare function parseHeaders(headers: Headers): Record<string, string>;
231
+ /**
232
+
233
+ * Parses an HTML string to extract generic metadata structures (Meta tags, JSON-LD, Time tags).
234
+
235
+ * This function does not perform field-specific logic (like finding a date); it simply
236
+
237
+ * collects available structured data.
238
+
239
+ *
240
+
241
+ * @param html - The raw HTML content to parse.
242
+
243
+ * @returns An object containing grouped metadata from the HTML.
244
+
245
+ */
246
+ declare function parseHtml(html: string): HtmlData;
247
+
248
+ /**
249
+ * Result object for generic metadata extraction.
250
+ */
251
+ interface MetadataResult {
252
+ /** The extracted and normalized date, if any. */
253
+ date?: string | null;
254
+ /** Placeholders for future metadata fields. */
255
+ [key: string]: any;
256
+ }
257
+ /**
258
+ * Supported metadata types for extraction.
259
+ */
260
+ type MetadataType = 'date' | string;
261
+ /**
262
+ * Extracts specific metadata from parsed HTML and headers based on a requested type.
263
+ * Currently supports 'date' extraction with a prioritized fallback mechanism.
264
+ *
265
+ * @param result - An object containing the raw HTML content and response headers.
266
+ * @param type - The type of metadata to extract.
267
+ * @returns The extracted and normalized value, or null if not found.
268
+ */
269
+ declare function extractMetadataFrom(result: {
270
+ content: string;
271
+ headers: Headers;
272
+ }, type: MetadataType): string | null;
273
+
274
+ /**
275
+ * Normalizes a date string into a standard ISO 8601 format (UTC).
276
+ * It handles various formats (YYYY-MM-DD, RFC2822, etc.) and performs
277
+ * aggressive cleaning and sanity checks.
278
+ *
279
+ * @param dateStr - The raw date string to normalize.
280
+ * @returns An ISO 8601 string (e.g., "2024-01-20T00:00:00.000Z") or null if invalid.
281
+ */
282
+ declare function normalizeDate(dateStr: string | null): string | null;
283
+
284
+ /**
285
+ * Options for the extractDate function.
286
+ */
287
+ interface ExtractOptions extends FetchExtractorOptions {
288
+ /**
289
+ * Maximum number of bytes to download from the URL.
290
+ * Defaults to 32768 (32KB), which is usually enough for the HTML <head>.
291
+ */
292
+ maxBytes?: number;
293
+ }
294
+ /**
295
+ * High-level convenience function to extract the publication or modification date from a URL.
296
+ * It performs a partial fetch of the content and applies multiple extraction rules
297
+ * (LD+JSON, Meta tags, Time tags, Headers) to find the most reliable date.
298
+ *
299
+ * @param url - The web page URL to analyze.
300
+ * @param options - Fetch and extraction options.
301
+ * @returns An ISO 8601 date string, or null if no valid date could be found.
302
+ *
303
+ * @example
304
+ * ```ts
305
+ * const date = await extractDate('https://example.com/article');
306
+ * console.log(date); // "2024-01-20T12:00:00.000Z"
307
+ * ```
308
+ */
309
+ declare function extractDate(url: string, options?: ExtractOptions): Promise<string | null>;
310
+
311
+ interface VerifiedUrl {
312
+ url: string;
313
+ latency: number;
314
+ }
315
+ /**
316
+ * A general utility to test a list of URLs for availability and latency.
317
+ * Returns a list of verified URLs sorted by response time.
318
+ */
319
+ declare function testUrlsByLatency(urls: string[], options?: {
320
+ timeout?: number;
321
+ limit?: number;
322
+ testPath?: string;
323
+ }): Promise<VerifiedUrl[]>;
143
324
 
144
325
  /**
145
326
  * Constructor definition for Searcher subclasses.
146
327
  */
147
328
  type SearcherConstructor = new (options?: FetcherOptions) => WebSearcher;
329
+
148
330
  /**
149
331
  * The abstract base class for all search engines.
150
332
  *
@@ -176,6 +358,10 @@ declare abstract class WebSearcher extends FetchSession {
176
358
  * Useful for registering shorthand names (e.g., 'g' for 'Google').
177
359
  */
178
360
  static alias?: string | string[];
361
+ /** Default base URLs for engines that support multiple instances. */
362
+ static defaultBaseUrls?: string[];
363
+ /** Globally shared index for tracking the currently active instance (node) across sessions. */
364
+ static currentInstanceIndex?: number;
179
365
  /**
180
366
  * Registers a search engine class.
181
367
  *
@@ -219,23 +405,25 @@ declare abstract class WebSearcher extends FetchSession {
219
405
  */
220
406
  static setAliases: (ctor: typeof WebSearcher, ...aliases: string[]) => void;
221
407
  /**
222
- * Static helper to execute a one-off search.
408
+ * Static helper to execute a one-off search or a fallback chain.
223
409
  *
224
- * It creates an instance of the specified engine, executes the search, and then
225
- * automatically disposes of the session.
410
+ * It creates an instance of the specified engine(s), executes the search, and automatically
411
+ * falls back to the next engine in the list if the current one fails or is exhausted.
226
412
  *
227
- * @param engineName - The name of the engine to use (e.g., 'Google').
413
+ * @param engineNames - The name(s) of the engine(s) to use (e.g., 'Google' or ['SearXNG', 'Google']).
228
414
  * @param query - The search query string.
229
415
  * @param options - Combined search options and fetcher options.
230
416
  * @returns A promise resolving to an array of standardized search results.
231
417
  */
232
- static search(engineName: string, query: string, options?: SearchOptions & FetcherOptions): Promise<StandardSearchResult[]>;
418
+ static search(engineNames: string | string[], query: string, options?: SearchOptions & FetcherOptions): Promise<StandardSearchResult[]>;
233
419
  /**
234
420
  * The declarative template for the fetch options.
235
421
  *
236
- * Subclasses **must** implement this getter to provide the engine configuration,
422
+ * Subclasses can implement this getter to provide the engine configuration,
237
423
  * including the base URL, search parameters pattern, and extraction rules.
238
424
  *
425
+ * This getter is **optional** if you override {@link getTemplate}.
426
+ *
239
427
  * Supports variable injection using syntax like `${query}`, `${offset}`, etc.
240
428
  *
241
429
  * @example
@@ -248,7 +436,7 @@ declare abstract class WebSearcher extends FetchSession {
248
436
  * }
249
437
  * ```
250
438
  */
251
- abstract get template(): FetcherOptions;
439
+ get template(): FetcherOptions;
252
440
  /**
253
441
  * Optional pagination configuration.
254
442
  * Defines how the searcher navigates to subsequent pages.
@@ -256,18 +444,39 @@ declare abstract class WebSearcher extends FetchSession {
256
444
  * If undefined, the searcher will only fetch the first page.
257
445
  */
258
446
  get pagination(): PaginationConfig | undefined;
447
+ /**
448
+ * Dynamically retrieves the fetch template based on current variables and search options.
449
+ *
450
+ * Subclasses can override this method to return different extraction rules (actions)
451
+ * or URL patterns based on the search category, region, or other parameters.
452
+ *
453
+ * @param variables - The calculated variables (from formatOptions, pagination, etc.).
454
+ * @param options - The original search options provided by the user.
455
+ * @returns The fetcher configuration to be used for the current request.
456
+ */
457
+ protected getTemplate(variables: Record<string, any>, options: SearchOptions): FetcherOptions;
259
458
  protected createContext(options?: FetcherOptions): _isdk_web_fetcher.FetchContext;
260
459
  /**
261
460
  * Executes a search query.
262
461
  *
263
- * This method handles the pagination loop, variable injection, fetching,
264
- * and result transformation.
462
+ * This method handles the pagination loop, multi-instance failover, variable injection,
463
+ * fetching, and result transformation.
265
464
  *
266
465
  * @param query - The search query string.
267
466
  * @param options - Optional search parameters (e.g., limit, timeRange).
268
467
  * @returns A promise resolving to an array of standardized search results.
269
468
  */
270
469
  search(query: string, options?: SearchOptions): Promise<StandardSearchResult[]>;
470
+ /**
471
+ * Hook for subclasses to validate fetched results before they are accepted.
472
+ * If this returns false, the instance manager will consider the fetch a failure
473
+ * and automatically switch to the next available baseUrl (if any).
474
+ *
475
+ * @param results - The extracted results.
476
+ * @param context - Context including the current baseUrl and page.
477
+ * @returns A promise resolving to true if valid, false otherwise.
478
+ */
479
+ protected validateFetchResult(results: StandardSearchResult[], context: SearchContext): Promise<boolean>;
271
480
  /**
272
481
  * Transform and clean the raw extracted results.
273
482
  *
@@ -347,4 +556,4 @@ declare class GoogleSearcher extends WebSearcher {
347
556
  protected transform(outputs: Record<string, any>): Promise<any[]>;
348
557
  }
349
558
 
350
- export { type CustomTimeRange, GoogleSearcher, type PaginationConfig, type SafeSearchLevel, type SearchCategory, type SearchContext, type SearchOptions, type SearchTimeRange, type SearchTimeRangePreset, type SearcherConstructor, type StandardSearchResult, WebSearcher };
559
+ export { type CustomTimeRange, type ExtractOptions, type FetchExtractorOptions, GoogleSearcher, type HtmlData, type MetadataResult, type MetadataType, type PaginationConfig, type SafeSearchLevel, type SearchCategory, type SearchContext, type SearchOptions, type SearchTimeRange, type SearchTimeRangePreset, type SearcherConstructor, type StandardSearchResult, type VerifiedUrl, WebSearcher, extractDate, extractMetadataFrom, fetchHeaders, fetchPartial, normalizeDate, parseHeaders, parseHtml, testUrlsByLatency };
package/dist/index.js CHANGED
@@ -1 +1 @@
1
- "use strict";var t,e=Object.defineProperty,r=Object.getOwnPropertyDescriptor,s=Object.getOwnPropertyNames,a=Object.prototype.hasOwnProperty,i={};((t,r)=>{for(var s in r)e(t,s,{get:r[s],enumerable:!0})})(i,{GoogleSearcher:()=>f,WebSearcher:()=>h}),module.exports=(t=i,((t,i,n,o)=>{if(i&&"object"==typeof i||"function"==typeof i)for(let c of s(i))a.call(t,c)||c===n||e(t,c,{get:()=>i[c],enumerable:!(o=r(i,c))||o.enumerable});return t})(e({},"__esModule",{value:!0}),t));var n=require("@isdk/web-fetcher"),o=require("custom-factory"),c=require("lodash-es");function l(t,e){if("string"==typeof t)return t.replace(/\$\{(.*?)\}/g,(t,r)=>{const s=e[r.trim()];return void 0!==s?String(s):""});if(Array.isArray(t))return t.map(t=>l(t,e));if((0,c.isPlainObject)(t)){const r={};for(const s in t)Object.prototype.hasOwnProperty.call(t,s)&&(r[s]=l(t[s],e));return r}return t}var u=require("lodash-es"),h=class extends n.FetchSession{static async search(t,e,r={}){const s=this.createObject(t,r);if(!s)throw new Error(`Search engine not found: ${t}`);try{return await s.search(e,r)}finally{await s.dispose()}}get pagination(){}createContext(t=this.options){const e=this.template,r=(0,u.defaultsDeep)({},e,t);return e.engine&&"auto"!==e.engine||!t.engine||(r.engine=t.engine),super.createContext(r)}async search(t,e={}){const r=e.limit||10,s=[];let a=0;const i=this.pagination?.startValue??0,n=this.pagination?.increment??1,o=e.maxPages||this.pagination?.maxPages||10;for(;s.length<r;){const c=this.formatOptions(e),h=i+a*n,f={...e,...c,query:t,page:a+i,offset:h,limit:r},m=l(this.template,f),d=(0,u.defaultsDeep)({},m,e),g=[];if(0===a||"url-param"===this.pagination?.type?d.url&&g.push({id:"goto",params:{url:d.url}}):"click-next"===this.pagination?.type&&this.pagination.nextButtonSelector&&(g.push({id:"click",params:{selector:this.pagination.nextButtonSelector}}),g.push({id:"waitFor",params:{networkIdle:!0,ms:500}})),d.actions){const t=d.actions.filter(t=>!(g.length>0&&"goto"===g[0].id&&"goto"===t.id));g.push(...t)}d.engine&&this.context.engine!==d.engine&&d.engine;const{outputs:p}=await this.executeAll(g),w={query:t,page:a,limit:e.limit};let y=[];if(y=await this.transform(p,w),e.transform&&(y=await e.transform(y,w)),!y||0===y.length)break;if(s.push(...y),s.length>=r||!this.pagination)break;if(a++,a>=o)break}return s.slice(0,r)}async transform(t,e){return t.results||[]}formatOptions(t){return{...t}}};h._isFactory=!1,(0,o.addBaseFactoryAbility)(h),h.prototype.name="Searcher";var f=class extends h{get template(){return{engine:"browser",browser:{headless:!1},url:"https://www.google.com/search?q=${query}&start=${offset}&tbs=${tbs}&tbm=${tbm}&gl=${gl}&hl=${hl}&safe=${safe}",actions:[{id:"extract",storeAs:"results",params:{type:"array",selector:"#main #search",items:{url:{selector:"a:has(h3)",attribute:"href",required:!0},title:{selector:"a:has(h3) h3",required:!0,mode:"innerText"},snippet:{selector:"div[style*='-webkit-line-clamp']",type:"html"}}}}]}}get pagination(){return{type:"url-param",paramName:"start",startValue:0,increment:10}}formatOptions(t){const e={};if(t.timeRange)if("string"==typeof t.timeRange){const r={day:"qdr:d",week:"qdr:w",month:"qdr:m",year:"qdr:y"};r[t.timeRange]&&(e.tbs=r[t.timeRange])}else{const r=new Date(t.timeRange.from),s=t.timeRange.to?new Date(t.timeRange.to):new Date;if(!isNaN(r.getTime())&&!isNaN(s.getTime())){const t=t=>`${t.getMonth()+1}/${t.getDate()}/${t.getFullYear()}`;e.tbs=`cdr:1,cd_min:${t(r)},cd_max:${t(s)}`}}if(t.category){const r={images:"isch",videos:"vid",news:"nws"};r[t.category]&&(e.tbm=r[t.category])}return t.region&&(e.gl=t.region),t.language&&(e.hl=t.language),t.safeSearch&&("strict"===t.safeSearch?e.safe="active":"off"===t.safeSearch&&(e.safe="images")),e}async transform(t){const e=t.results||[];return Array.isArray(e)?e.map(t=>{if(t.url&&t.url.startsWith("/url?q="))try{const e=new URL(t.url,"https://www.google.com").searchParams.get("q");e&&(t.url=e)}catch(t){}return t}):[]}};f.alias=["google"];
1
+ "use strict";var t,e=Object.defineProperty,r=Object.getOwnPropertyDescriptor,n=Object.getOwnPropertyNames,s=Object.prototype.hasOwnProperty,i={};async function a(t,e={}){const{timeout:r=5e3,headers:n}=e,s=new AbortController,i=setTimeout(()=>s.abort(),r);try{return(await fetch(t,{method:"HEAD",signal:s.signal,headers:{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",...n}})).headers}catch(t){return null}finally{clearTimeout(i)}}async function o(t,e=32768,r={}){const{timeout:n=1e4,headers:s}=r,i=new AbortController,a=setTimeout(()=>i.abort(),n);let o="",c=new Headers;try{const r=await fetch(t,{signal:i.signal,headers:{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",...s}});if(c=r.headers,!r.ok||!r.body)return null;const n=r.headers.get("content-type"),a=n?.match(/charset=([\w-]+)/i),l=a?a[1]:"utf-8",u=r.body.getReader(),f=new TextDecoder(l);let d=0;for(;;)try{const{done:t,value:r}=await u.read();if(t)break;if(d+=r.length,o+=f.decode(r,{stream:!0}),d>=e){i.abort();break}}catch(t){if("AbortError"===t.name)break;throw t}return{content:o,headers:c}}catch(t){return o.length>0?{content:o,headers:c}:null}finally{clearTimeout(a)}}function c(t){const e={};return t.forEach((t,r)=>{e[r.toLowerCase()]=t}),e}function l(t){const e={meta:{},jsonLd:[],time:[]},r=/<meta\s+([^>]+?)>/gi;let n;for(;null!==(n=r.exec(t));){const t=f(n[1]),r=t.name||t.property||t.itemprop,s=t.content;r&&s&&(e.meta[r.toLowerCase()]=s)}const s=/<script\s+[^>]*?type\s*=\s*["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;for(;null!==(n=s.exec(t));){const t=n[1];try{const r=JSON.parse(t);e.jsonLd.push(r)}catch(r){const n=u(t);n&&e.jsonLd.push(n)}}const i=/<time([^>]*?)>([\s\S]*?)<\/time>/gi;for(;null!==(n=i.exec(t));){const t=f(n[1]).datetime,r=n[2].replace(/<[^>]*>/g,"").trim();e.time.push({datetime:t,text:r})}return e}function u(t){const e=["datePublished","dateModified","pubDate","publishedAt"],r={};let n=!1;for(const s of e){const e=new RegExp(`"${s}"\\s*:\\s*"([^"]+)"`,"i"),i=t.match(e);i&&(r[s]=i[1],n=!0)}return n?r:null}function f(t){const e={},r=/([a-z0-9:._-]+)(?:\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^>\s]+)))?/gi;let n;for(;null!==(n=r.exec(t));){const t=n[1].toLowerCase(),r=n[2]??n[3]??n[4]??"";e[t]=r}return e}function d(t){if(!t)return null;try{let e=t.trim();if(!e)return null;e=e.replace(/^(?:last|first|posted|originally)\s*(?:published|updated|date|posted|modified)\s*(?:on|at)?[:\s]*/i,""),e=e.replace(/^(?:published|updated|date|posted|modified)\s*(?:on|at)?[:\s]*/i,""),e=e.split(/[\(|\|]|by\s+|[-–—]\s*\d+\s*min/i)[0].trim();const r=new Date(e);if(!isNaN(r.getTime())){const t=r.getUTCFullYear(),e=(new Date).getUTCFullYear();if(t>=-1e4&&t<=e+20)return r.toISOString()}}catch(t){}return null}function h(t,e){const r=l(t.content);return"date"===e?function(t,e){const r=function(t){const e=["datePublished","dateModified","pubDate","publishedAt"],r=t=>{if(!t||"object"!=typeof t)return null;for(const r of e)if("string"==typeof t[r])return t[r];if(Array.isArray(t))for(const e of t){const t=r(e);if(t)return t}else if(t["@graph"]&&Array.isArray(t["@graph"]))return r(t["@graph"]);return null};return r(t)}(t.jsonLd),n=d(r);if(n)return n;const s=function(t){const e=["article:published_time","og:published_time","datepublished","date","pubdate","publishdate","dc.date.issued","bt:pubdate","sailthru.date","article:modified_time","og:updated_time","modifieddate"];for(const r of e)if(t[r])return t[r];return null}(t.meta),i=d(s);if(i)return i;for(const e of t.time){const t=d(e.datetime||e.text);if(t)return t}const a=c(e);return d(a["last-modified"])}(r,t.headers):null}async function m(t,e={}){const r=await o(t,e.maxBytes,e);return r?h(r,"date"):null}((t,r)=>{for(var n in r)e(t,n,{get:r[n],enumerable:!0})})(i,{FetcherOptions:()=>y.FetcherOptions,GoogleSearcher:()=>A,WebSearcher:()=>q,extractDate:()=>m,extractMetadataFrom:()=>h,fetchHeaders:()=>a,fetchPartial:()=>o,normalizeDate:()=>d,parseHeaders:()=>c,parseHtml:()=>l,testUrlsByLatency:()=>b}),module.exports=(t=i,((t,i,a,o)=>{if(i&&"object"==typeof i||"function"==typeof i)for(let c of n(i))s.call(t,c)||c===a||e(t,c,{get:()=>i[c],enumerable:!(o=r(i,c))||o.enumerable});return t})(e({},"__esModule",{value:!0}),t));var p=require("@isdk/web-fetcher");async function b(t,e={}){const{timeout:r=5e3,limit:n=20,testPath:s=""}=e;return(await Promise.all(t.map(async t=>{const e=Date.now();try{const n=s?(t.endsWith("/")?t.slice(0,-1):t)+(s.startsWith("/")?s:"/"+s):t;return await(0,p.fetchWeb)(n,{timeoutMs:r}),{url:t,latency:Date.now()-e}}catch(t){return null}}))).filter(t=>null!==t).sort((t,e)=>t.latency-e.latency).slice(0,n)}var y=require("@isdk/web-fetcher"),w=require("custom-factory"),g=require("lodash-es");function k(t,e){if("string"==typeof t)return t.replace(/\$\{(.*?)\}/g,(t,r)=>{const n=e[r.trim()];return void 0!==n?String(n):""});if(Array.isArray(t))return t.map(t=>k(t,e));if((0,g.isPlainObject)(t)){const r={};for(const n in t)Object.prototype.hasOwnProperty.call(t,n)&&(r[n]=k(t[n],e));return r}return t}var $=require("lodash-es"),q=class extends y.FetchSession{static async search(t,e,r={}){const n=Array.isArray(t)?t:[t],s=r.limit||10,i=r.fillLimit??!0,a=[];for(let t=0;t<n.length;t++){const o=n[t];if(a.length>=s)break;const c=s-a.length,l={...r,limit:c},u=this.createObject(o,l);if(!u)throw new Error(`Search engine not found: ${o}`);try{const t=await u.search(e,l);for(const e of t)e.url&&!a.some(t=>t.url===e.url)&&a.push(e);if(a.length>=s)break;if(!1===i)break}catch(e){if(console.warn(`[WebSearcher] Engine '${o}' failed completely:`,e),t===n.length-1&&0===a.length)throw e}finally{await u.dispose()}}return a}get template(){return{}}get pagination(){}getTemplate(t,e){return(0,$.cloneDeep)(this.template)}createContext(t=this.options){const{actions:e,...r}=this.template,n=(0,$.defaultsDeep)({},r,t);return r.engine&&"auto"!==r.engine||!t.engine||(n.engine=t.engine),super.createContext(n)}async search(t,e={}){const r=e.limit||10,n=[],s=new Set;let i=e.startPage||0;const a=this.pagination?.startValue??0,o=this.pagination?.increment??1,c=e.maxPages||this.pagination?.maxPages||10,l=this.constructor.name;let u;e.baseUrls&&(Array.isArray(e.baseUrls)?u=e.baseUrls:"object"==typeof e.baseUrls&&(u=e.baseUrls[l]||e.baseUrls[this.constructor.alias?.[0]])),u&&0!==u.length||(u=this.constructor.defaultBaseUrls);const f=u&&u.length>0;let d=0;f&&"number"==typeof this.constructor.currentInstanceIndex&&(d=this.constructor.currentInstanceIndex);let h=!1;for(;n.length<r;){let m=!1,p=null;const b=f?u.length:1;let y=0;for(;y<b;){const c=f?u[d]:void 0,b=this.formatOptions(e),w=a+i*o,g={...e,...b,query:t,page:i+a,offset:w,limit:r,baseUrl:c?.endsWith("/")?c.slice(0,-1):c},q=k(this.getTemplate(g,e),g),{actions:A,...v}=e,x=(0,$.defaultsDeep)({},q,v),D=[],S=x.actions||[];if(i===(e.startPage||0)||"url-param"===this.pagination?.type){if(x.url){S.some(t=>"goto"===(t.id??t.name??t.action)&&t.params?.url===x.url)||D.push({id:"goto",params:{url:x.url}})}}else"click-next"===this.pagination?.type&&this.pagination.nextButtonSelector&&(D.push({id:"click",params:{selector:this.pagination.nextButtonSelector}}),D.push({id:"waitFor",params:{networkIdle:!0,ms:500}}));D.push(...S),x.engine&&this.context.engine!==x.engine&&x.engine;try{const{outputs:r}=await this.executeAll(D,e),a={...e,query:t,page:i,baseUrl:c,engine:l};let o=await this.transform(r,a);e.transform&&(o=await e.transform(o,a));let u=!0;if(this.validateFetchResult&&(u=await this.validateFetchResult(o,a)),u&&e.validator&&(u=await e.validator(o,a)),!u)throw new Error(`Results validation failed for engine: ${l}, url: ${c}`);if(o&&0!==o.length)for(const t of o)t.url&&!s.has(t.url)&&(s.add(t.url),n.push(t));else h=!0;m=!0;break}catch(t){p=t,f&&(d=(d+1)%u.length,this.constructor.currentInstanceIndex=d),y++}}if(!m)throw p||new Error(`All instances failed for engine: ${l}`);if(h)break;if(n.length>=r||!this.pagination)break;if(i++,i>=c)break}return n.slice(0,r)}async validateFetchResult(t,e){return!0}async transform(t,e){return t.results||[]}formatOptions(t){return{...t}}};q._isFactory=!1,(0,w.addBaseFactoryAbility)(q),q.prototype.name="Searcher";var A=class extends q{get template(){return{engine:"browser",browser:{headless:!1},url:"https://www.google.com/search?q=${query}&start=${offset}&tbs=${tbs}&tbm=${tbm}&gl=${gl}&hl=${hl}&safe=${safe}",actions:[{id:"extract",storeAs:"results",params:{type:"array",selector:"#main #search",items:{url:{selector:"a:has(h3)",attribute:"href",required:!0},title:{selector:"a:has(h3) h3",required:!0,mode:"innerText"},snippet:{selector:"div[style*='-webkit-line-clamp']",type:"html"}}}}]}}get pagination(){return{type:"url-param",paramName:"start",startValue:0,increment:10}}formatOptions(t){const e={};if(t.timeRange)if("string"==typeof t.timeRange){const r={hour:"qdr:h",day:"qdr:d",week:"qdr:w",month:"qdr:m",year:"qdr:y"};r[t.timeRange]&&(e.tbs=r[t.timeRange])}else{const r=new Date(t.timeRange.from),n=t.timeRange.to?new Date(t.timeRange.to):new Date;if(!isNaN(r.getTime())&&!isNaN(n.getTime())){const t=t=>`${t.getMonth()+1}/${t.getDate()}/${t.getFullYear()}`;e.tbs=`cdr:1,cd_min:${t(r)},cd_max:${t(n)}`}}if(t.category){const r={images:"isch",videos:"vid",news:"nws"};r[t.category]&&(e.tbm=r[t.category])}return t.region&&(e.gl=t.region),t.language&&(e.hl=t.language),t.safeSearch&&("strict"===t.safeSearch?e.safe="active":"off"===t.safeSearch&&(e.safe="images")),e}async transform(t){const e=t.results||[];return Array.isArray(e)?e.map(t=>{if(t.url&&t.url.startsWith("/url?q="))try{const e=new URL(t.url,"https://www.google.com").searchParams.get("q");e&&(t.url=e)}catch(t){}return t}):[]}};A.alias=["google"];