@isdk/web-searcher 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.cn.md +196 -7
  2. package/README.md +196 -7
  3. package/dist/index.d.mts +234 -11
  4. package/dist/index.d.ts +234 -11
  5. package/dist/index.js +1 -1
  6. package/dist/index.mjs +1 -1
  7. package/docs/README.md +196 -7
  8. package/docs/classes/GoogleSearcher.md +289 -60
  9. package/docs/classes/WebSearcher.md +264 -61
  10. package/docs/functions/extractDate.md +42 -0
  11. package/docs/functions/extractMetadataFrom.md +40 -0
  12. package/docs/functions/fetchHeaders.md +34 -0
  13. package/docs/functions/fetchPartial.md +41 -0
  14. package/docs/functions/normalizeDate.md +29 -0
  15. package/docs/functions/parseHeaders.md +28 -0
  16. package/docs/functions/parseHtml.md +31 -0
  17. package/docs/functions/testUrlsByLatency.md +42 -0
  18. package/docs/globals.md +18 -0
  19. package/docs/interfaces/CustomTimeRange.md +3 -3
  20. package/docs/interfaces/ExtractOptions.md +54 -0
  21. package/docs/interfaces/FetchExtractorOptions.md +35 -0
  22. package/docs/interfaces/FetcherOptions.md +436 -0
  23. package/docs/interfaces/HtmlData.md +53 -0
  24. package/docs/interfaces/MetadataResult.md +27 -0
  25. package/docs/interfaces/PaginationConfig.md +9 -9
  26. package/docs/interfaces/SearchContext.md +30 -4
  27. package/docs/interfaces/SearchOptions.md +77 -11
  28. package/docs/interfaces/StandardSearchResult.md +10 -10
  29. package/docs/interfaces/VerifiedUrl.md +25 -0
  30. package/docs/type-aliases/MetadataType.md +13 -0
  31. package/docs/type-aliases/SafeSearchLevel.md +1 -1
  32. package/docs/type-aliases/SearchCategory.md +2 -2
  33. package/docs/type-aliases/SearchTimeRange.md +1 -1
  34. package/docs/type-aliases/SearchTimeRangePreset.md +1 -1
  35. package/docs/type-aliases/SearcherConstructor.md +2 -2
  36. package/package.json +3 -2
package/dist/index.d.mts CHANGED
@@ -1,5 +1,6 @@
1
1
  import * as _isdk_web_fetcher from '@isdk/web-fetcher';
2
2
  import { FetcherOptions, FetchSession } from '@isdk/web-fetcher';
3
+ export { FetcherOptions } from '@isdk/web-fetcher';
3
4
  import { IBaseFactoryOptions } from 'custom-factory';
4
5
 
5
6
  /**
@@ -83,6 +84,12 @@ interface SearchContext {
83
84
  page: number;
84
85
  /** The requested limit of results. */
85
86
  limit?: number;
87
+ /** Allows for custom variables passed via search options. */
88
+ [key: string]: any;
89
+ /** The baseUrl used for this specific fetch (if multi-instance is enabled) */
90
+ baseUrl?: string;
91
+ /** The name of the engine executing the search */
92
+ engine?: string;
86
93
  }
87
94
  type SearchTimeRangePreset = 'all' | 'hour' | 'day' | 'week' | 'month' | 'year';
88
95
  interface CustomTimeRange {
@@ -92,7 +99,7 @@ interface CustomTimeRange {
92
99
  to?: Date | string;
93
100
  }
94
101
  type SearchTimeRange = SearchTimeRangePreset | CustomTimeRange;
95
- type SearchCategory = 'all' | 'images' | 'videos' | 'news';
102
+ type SearchCategory = 'all' | 'images' | 'videos' | 'news' | string;
96
103
  type SafeSearchLevel = 'off' | 'moderate' | 'strict';
97
104
  /**
98
105
  * Options provided when executing a search.
@@ -139,12 +146,188 @@ interface SearchOptions {
139
146
  transform?: (results: StandardSearchResult[], context: SearchContext) => Promise<StandardSearchResult[]> | StandardSearchResult[];
140
147
  /** Any other custom variables to be injected into the template. */
141
148
  [key: string]: any;
149
+ /**
150
+ * Allows the user to dynamically specify or override the base URLs for the engines.
151
+ * Can be an array of URLs for a single engine, or a map of engine names to URL arrays.
152
+ */
153
+ baseUrls?: string[] | Record<string, string[]>;
154
+ /**
155
+ * User-defined callback to validate the fetched results for a page.
156
+ * If it returns false, the fetch is considered a failure, triggering the retry/failover mechanism.
157
+ */
158
+ validator?: (results: StandardSearchResult[], context: SearchContext) => boolean | Promise<boolean>;
159
+ /**
160
+ * If true (default), the searcher will attempt to fulfill the requested `limit`
161
+ * by falling back to subsequent engines in the chain if previous ones are exhausted.
162
+ * If false, it will stop after the first successful engine regardless of whether
163
+ * the limit was reached.
164
+ */
165
+ fillLimit?: boolean;
166
+ /**
167
+ * Specifies which page index to start the search from.
168
+ * Useful when delegating pagination across different sessions.
169
+ * @default 0
170
+ */
171
+ startPage?: number;
172
+ }
173
+
174
+ /**
175
+ * Options for network requests.
176
+ */
177
+ interface FetchExtractorOptions {
178
+ /** Timeout in milliseconds. Defaults vary by function (5s to 10s). */
179
+ timeout?: number;
180
+ /** Custom HTTP headers to include in the request. */
181
+ headers?: Record<string, string>;
182
+ }
183
+ /**
184
+ * Fetches only the HTTP headers for a given URL using a HEAD request.
185
+ * Useful for checking 'last-modified' without downloading the body.
186
+ *
187
+ * @param url - The URL to check.
188
+ * @param options - Request options.
189
+ * @returns The Headers object, or null on failure.
190
+ */
191
+ declare function fetchHeaders(url: string, options?: FetchExtractorOptions): Promise<Headers | null>;
192
+ /**
193
+ * Fetches a partial amount of content from a URL.
194
+ * Automatically handles character set detection from the Content-Type header.
195
+ * Aborts the request once the specified maxBytes is reached.
196
+ *
197
+ * @param url - The URL to fetch.
198
+ * @param maxBytes - The maximum number of bytes to read. Defaults to 32KB.
199
+ * @param options - Request options.
200
+ * @returns An object containing the decoded content string and the response headers.
201
+ */
202
+ declare function fetchPartial(url: string, maxBytes?: number, options?: FetchExtractorOptions): Promise<{
203
+ content: string;
204
+ headers: Headers;
205
+ } | null>;
206
+
207
+ /**
208
+ * Represents structured data extracted from an HTML document.
209
+ */
210
+ interface HtmlData {
211
+ /** Map of meta tag names/properties to their content. Keys are lowercase. */
212
+ meta: Record<string, string>;
213
+ /** Array of parsed JSON-LD objects found in the document. */
214
+ jsonLd: any[];
215
+ /** Array of data from HTML <time> tags. */
216
+ time: Array<{
217
+ /** The value of the 'datetime' attribute, if present. */
218
+ datetime: string | null;
219
+ /** The text content within the <time> tag, with HTML stripped. */
220
+ text: string;
221
+ }>;
142
222
  }
223
+ /**
224
+ * Converts a Web API Headers object into a plain JavaScript record.
225
+ * All header names are converted to lowercase for consistent access.
226
+ *
227
+ * @param headers - The Headers object to parse.
228
+ * @returns A record where keys are lowercase header names.
229
+ */
230
+ declare function parseHeaders(headers: Headers): Record<string, string>;
231
+ /**
232
+
233
+ * Parses an HTML string to extract generic metadata structures (Meta tags, JSON-LD, Time tags).
234
+
235
+ * This function does not perform field-specific logic (like finding a date); it simply
236
+
237
+ * collects available structured data.
238
+
239
+ *
240
+
241
+ * @param html - The raw HTML content to parse.
242
+
243
+ * @returns An object containing grouped metadata from the HTML.
244
+
245
+ */
246
+ declare function parseHtml(html: string): HtmlData;
247
+
248
+ /**
249
+ * Result object for generic metadata extraction.
250
+ */
251
+ interface MetadataResult {
252
+ /** The extracted and normalized date, if any. */
253
+ date?: string | null;
254
+ /** Placeholders for future metadata fields. */
255
+ [key: string]: any;
256
+ }
257
+ /**
258
+ * Supported metadata types for extraction.
259
+ */
260
+ type MetadataType = 'date' | string;
261
+ /**
262
+ * Extracts specific metadata from parsed HTML and headers based on a requested type.
263
+ * Currently supports 'date' extraction with a prioritized fallback mechanism.
264
+ *
265
+ * @param result - An object containing the raw HTML content and response headers.
266
+ * @param type - The type of metadata to extract.
267
+ * @returns The extracted and normalized value, or null if not found.
268
+ */
269
+ declare function extractMetadataFrom(result: {
270
+ content: string;
271
+ headers: Headers;
272
+ }, type: MetadataType): string | null;
273
+
274
+ /**
275
+ * Normalizes a date string into a standard ISO 8601 format (UTC).
276
+ * It handles various formats (YYYY-MM-DD, RFC2822, etc.) and performs
277
+ * aggressive cleaning and sanity checks.
278
+ *
279
+ * @param dateStr - The raw date string to normalize.
280
+ * @returns An ISO 8601 string (e.g., "2024-01-20T00:00:00.000Z") or null if invalid.
281
+ */
282
+ declare function normalizeDate(dateStr: string | null): string | null;
283
+
284
+ /**
285
+ * Options for the extractDate function.
286
+ */
287
+ interface ExtractOptions extends FetchExtractorOptions {
288
+ /**
289
+ * Maximum number of bytes to download from the URL.
290
+ * Defaults to 32768 (32KB), which is usually enough for the HTML <head>.
291
+ */
292
+ maxBytes?: number;
293
+ }
294
+ /**
295
+ * High-level convenience function to extract the publication or modification date from a URL.
296
+ * It performs a partial fetch of the content and applies multiple extraction rules
297
+ * (LD+JSON, Meta tags, Time tags, Headers) to find the most reliable date.
298
+ *
299
+ * @param url - The web page URL to analyze.
300
+ * @param options - Fetch and extraction options.
301
+ * @returns An ISO 8601 date string, or null if no valid date could be found.
302
+ *
303
+ * @example
304
+ * ```ts
305
+ * const date = await extractDate('https://example.com/article');
306
+ * console.log(date); // "2024-01-20T12:00:00.000Z"
307
+ * ```
308
+ */
309
+ declare function extractDate(url: string, options?: ExtractOptions): Promise<string | null>;
310
+
311
+ interface VerifiedUrl {
312
+ url: string;
313
+ latency: number;
314
+ }
315
+ /**
316
+ * A general utility to test a list of URLs for availability and latency.
317
+ * Returns a list of verified URLs sorted by response time.
318
+ */
319
+ declare function testUrlsByLatency(urls: string[], options?: {
320
+ timeout?: number;
321
+ limit?: number;
322
+ testPath?: string;
323
+ proxy?: string;
324
+ }): Promise<VerifiedUrl[]>;
143
325
 
144
326
  /**
145
327
  * Constructor definition for Searcher subclasses.
146
328
  */
147
329
  type SearcherConstructor = new (options?: FetcherOptions) => WebSearcher;
330
+
148
331
  /**
149
332
  * The abstract base class for all search engines.
150
333
  *
@@ -176,6 +359,23 @@ declare abstract class WebSearcher extends FetchSession {
176
359
  * Useful for registering shorthand names (e.g., 'g' for 'Google').
177
360
  */
178
361
  static alias?: string | string[];
362
+ /** Default base URLs for engines that support multiple instances. */
363
+ static defaultBaseUrls?: string[];
364
+ /** Globally shared index for tracking the currently active instance (node) across sessions. */
365
+ static currentInstanceIndex?: number;
366
+ /** @internal */
367
+ static _defaultOptions?: SearchOptions;
368
+ /**
369
+ * Gets or sets the default search parameters for this specific engine class.
370
+ * This does not include settings from parent classes.
371
+ */
372
+ static get defaultOptions(): SearchOptions;
373
+ static set defaultOptions(options: SearchOptions);
374
+ /**
375
+ * Retrieves the combined default search options by traversing the prototype chain.
376
+ * Priority: Current class > Parent class > WebSearcher base class.
377
+ */
378
+ static getDefaultOptions(): SearchOptions;
179
379
  /**
180
380
  * Registers a search engine class.
181
381
  *
@@ -219,23 +419,25 @@ declare abstract class WebSearcher extends FetchSession {
219
419
  */
220
420
  static setAliases: (ctor: typeof WebSearcher, ...aliases: string[]) => void;
221
421
  /**
222
- * Static helper to execute a one-off search.
422
+ * Static helper to execute a one-off search or a fallback chain.
223
423
  *
224
- * It creates an instance of the specified engine, executes the search, and then
225
- * automatically disposes of the session.
424
+ * It creates an instance of the specified engine(s), executes the search, and automatically
425
+ * falls back to the next engine in the list if the current one fails or is exhausted.
226
426
  *
227
- * @param engineName - The name of the engine to use (e.g., 'Google').
427
+ * @param engineNames - The name(s) of the engine(s) to use (e.g., 'Google' or ['SearXNG', 'Google']).
228
428
  * @param query - The search query string.
229
429
  * @param options - Combined search options and fetcher options.
230
430
  * @returns A promise resolving to an array of standardized search results.
231
431
  */
232
- static search(engineName: string, query: string, options?: SearchOptions & FetcherOptions): Promise<StandardSearchResult[]>;
432
+ static search(engineNames: string | string[], query: string, options?: SearchOptions & FetcherOptions): Promise<StandardSearchResult[]>;
233
433
  /**
234
434
  * The declarative template for the fetch options.
235
435
  *
236
- * Subclasses **must** implement this getter to provide the engine configuration,
436
+ * Subclasses can implement this getter to provide the engine configuration,
237
437
  * including the base URL, search parameters pattern, and extraction rules.
238
438
  *
439
+ * This getter is **optional** if you override {@link getTemplate}.
440
+ *
239
441
  * Supports variable injection using syntax like `${query}`, `${offset}`, etc.
240
442
  *
241
443
  * @example
@@ -248,7 +450,7 @@ declare abstract class WebSearcher extends FetchSession {
248
450
  * }
249
451
  * ```
250
452
  */
251
- abstract get template(): FetcherOptions;
453
+ get template(): FetcherOptions;
252
454
  /**
253
455
  * Optional pagination configuration.
254
456
  * Defines how the searcher navigates to subsequent pages.
@@ -256,18 +458,39 @@ declare abstract class WebSearcher extends FetchSession {
256
458
  * If undefined, the searcher will only fetch the first page.
257
459
  */
258
460
  get pagination(): PaginationConfig | undefined;
461
+ /**
462
+ * Dynamically retrieves the fetch template based on current variables and search options.
463
+ *
464
+ * Subclasses can override this method to return different extraction rules (actions)
465
+ * or URL patterns based on the search category, region, or other parameters.
466
+ *
467
+ * @param variables - The calculated variables (from formatOptions, pagination, etc.).
468
+ * @param options - The original search options provided by the user.
469
+ * @returns The fetcher configuration to be used for the current request.
470
+ */
471
+ protected getTemplate(variables: Record<string, any>, options: SearchOptions): FetcherOptions;
259
472
  protected createContext(options?: FetcherOptions): _isdk_web_fetcher.FetchContext;
260
473
  /**
261
474
  * Executes a search query.
262
475
  *
263
- * This method handles the pagination loop, variable injection, fetching,
264
- * and result transformation.
476
+ * This method handles the pagination loop, multi-instance failover, variable injection,
477
+ * fetching, and result transformation.
265
478
  *
266
479
  * @param query - The search query string.
267
480
  * @param options - Optional search parameters (e.g., limit, timeRange).
268
481
  * @returns A promise resolving to an array of standardized search results.
269
482
  */
270
483
  search(query: string, options?: SearchOptions): Promise<StandardSearchResult[]>;
484
+ /**
485
+ * Hook for subclasses to validate fetched results before they are accepted.
486
+ * If this returns false, the instance manager will consider the fetch a failure
487
+ * and automatically switch to the next available baseUrl (if any).
488
+ *
489
+ * @param results - The extracted results.
490
+ * @param context - Context including the current baseUrl and page.
491
+ * @returns A promise resolving to true if valid, false otherwise.
492
+ */
493
+ protected validateFetchResult(results: StandardSearchResult[], context: SearchContext): Promise<boolean>;
271
494
  /**
272
495
  * Transform and clean the raw extracted results.
273
496
  *
@@ -347,4 +570,4 @@ declare class GoogleSearcher extends WebSearcher {
347
570
  protected transform(outputs: Record<string, any>): Promise<any[]>;
348
571
  }
349
572
 
350
- export { type CustomTimeRange, GoogleSearcher, type PaginationConfig, type SafeSearchLevel, type SearchCategory, type SearchContext, type SearchOptions, type SearchTimeRange, type SearchTimeRangePreset, type SearcherConstructor, type StandardSearchResult, WebSearcher };
573
+ export { type CustomTimeRange, type ExtractOptions, type FetchExtractorOptions, GoogleSearcher, type HtmlData, type MetadataResult, type MetadataType, type PaginationConfig, type SafeSearchLevel, type SearchCategory, type SearchContext, type SearchOptions, type SearchTimeRange, type SearchTimeRangePreset, type SearcherConstructor, type StandardSearchResult, type VerifiedUrl, WebSearcher, extractDate, extractMetadataFrom, fetchHeaders, fetchPartial, normalizeDate, parseHeaders, parseHtml, testUrlsByLatency };
package/dist/index.d.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import * as _isdk_web_fetcher from '@isdk/web-fetcher';
2
2
  import { FetcherOptions, FetchSession } from '@isdk/web-fetcher';
3
+ export { FetcherOptions } from '@isdk/web-fetcher';
3
4
  import { IBaseFactoryOptions } from 'custom-factory';
4
5
 
5
6
  /**
@@ -83,6 +84,12 @@ interface SearchContext {
83
84
  page: number;
84
85
  /** The requested limit of results. */
85
86
  limit?: number;
87
+ /** Allows for custom variables passed via search options. */
88
+ [key: string]: any;
89
+ /** The baseUrl used for this specific fetch (if multi-instance is enabled) */
90
+ baseUrl?: string;
91
+ /** The name of the engine executing the search */
92
+ engine?: string;
86
93
  }
87
94
  type SearchTimeRangePreset = 'all' | 'hour' | 'day' | 'week' | 'month' | 'year';
88
95
  interface CustomTimeRange {
@@ -92,7 +99,7 @@ interface CustomTimeRange {
92
99
  to?: Date | string;
93
100
  }
94
101
  type SearchTimeRange = SearchTimeRangePreset | CustomTimeRange;
95
- type SearchCategory = 'all' | 'images' | 'videos' | 'news';
102
+ type SearchCategory = 'all' | 'images' | 'videos' | 'news' | string;
96
103
  type SafeSearchLevel = 'off' | 'moderate' | 'strict';
97
104
  /**
98
105
  * Options provided when executing a search.
@@ -139,12 +146,188 @@ interface SearchOptions {
139
146
  transform?: (results: StandardSearchResult[], context: SearchContext) => Promise<StandardSearchResult[]> | StandardSearchResult[];
140
147
  /** Any other custom variables to be injected into the template. */
141
148
  [key: string]: any;
149
+ /**
150
+ * Allows the user to dynamically specify or override the base URLs for the engines.
151
+ * Can be an array of URLs for a single engine, or a map of engine names to URL arrays.
152
+ */
153
+ baseUrls?: string[] | Record<string, string[]>;
154
+ /**
155
+ * User-defined callback to validate the fetched results for a page.
156
+ * If it returns false, the fetch is considered a failure, triggering the retry/failover mechanism.
157
+ */
158
+ validator?: (results: StandardSearchResult[], context: SearchContext) => boolean | Promise<boolean>;
159
+ /**
160
+ * If true (default), the searcher will attempt to fulfill the requested `limit`
161
+ * by falling back to subsequent engines in the chain if previous ones are exhausted.
162
+ * If false, it will stop after the first successful engine regardless of whether
163
+ * the limit was reached.
164
+ */
165
+ fillLimit?: boolean;
166
+ /**
167
+ * Specifies which page index to start the search from.
168
+ * Useful when delegating pagination across different sessions.
169
+ * @default 0
170
+ */
171
+ startPage?: number;
172
+ }
173
+
174
+ /**
175
+ * Options for network requests.
176
+ */
177
+ interface FetchExtractorOptions {
178
+ /** Timeout in milliseconds. Defaults vary by function (5s to 10s). */
179
+ timeout?: number;
180
+ /** Custom HTTP headers to include in the request. */
181
+ headers?: Record<string, string>;
182
+ }
183
+ /**
184
+ * Fetches only the HTTP headers for a given URL using a HEAD request.
185
+ * Useful for checking 'last-modified' without downloading the body.
186
+ *
187
+ * @param url - The URL to check.
188
+ * @param options - Request options.
189
+ * @returns The Headers object, or null on failure.
190
+ */
191
+ declare function fetchHeaders(url: string, options?: FetchExtractorOptions): Promise<Headers | null>;
192
+ /**
193
+ * Fetches a partial amount of content from a URL.
194
+ * Automatically handles character set detection from the Content-Type header.
195
+ * Aborts the request once the specified maxBytes is reached.
196
+ *
197
+ * @param url - The URL to fetch.
198
+ * @param maxBytes - The maximum number of bytes to read. Defaults to 32KB.
199
+ * @param options - Request options.
200
+ * @returns An object containing the decoded content string and the response headers.
201
+ */
202
+ declare function fetchPartial(url: string, maxBytes?: number, options?: FetchExtractorOptions): Promise<{
203
+ content: string;
204
+ headers: Headers;
205
+ } | null>;
206
+
207
+ /**
208
+ * Represents structured data extracted from an HTML document.
209
+ */
210
+ interface HtmlData {
211
+ /** Map of meta tag names/properties to their content. Keys are lowercase. */
212
+ meta: Record<string, string>;
213
+ /** Array of parsed JSON-LD objects found in the document. */
214
+ jsonLd: any[];
215
+ /** Array of data from HTML <time> tags. */
216
+ time: Array<{
217
+ /** The value of the 'datetime' attribute, if present. */
218
+ datetime: string | null;
219
+ /** The text content within the <time> tag, with HTML stripped. */
220
+ text: string;
221
+ }>;
142
222
  }
223
+ /**
224
+ * Converts a Web API Headers object into a plain JavaScript record.
225
+ * All header names are converted to lowercase for consistent access.
226
+ *
227
+ * @param headers - The Headers object to parse.
228
+ * @returns A record where keys are lowercase header names.
229
+ */
230
+ declare function parseHeaders(headers: Headers): Record<string, string>;
231
+ /**
232
+
233
+ * Parses an HTML string to extract generic metadata structures (Meta tags, JSON-LD, Time tags).
234
+
235
+ * This function does not perform field-specific logic (like finding a date); it simply
236
+
237
+ * collects available structured data.
238
+
239
+ *
240
+
241
+ * @param html - The raw HTML content to parse.
242
+
243
+ * @returns An object containing grouped metadata from the HTML.
244
+
245
+ */
246
+ declare function parseHtml(html: string): HtmlData;
247
+
248
+ /**
249
+ * Result object for generic metadata extraction.
250
+ */
251
+ interface MetadataResult {
252
+ /** The extracted and normalized date, if any. */
253
+ date?: string | null;
254
+ /** Placeholders for future metadata fields. */
255
+ [key: string]: any;
256
+ }
257
+ /**
258
+ * Supported metadata types for extraction.
259
+ */
260
+ type MetadataType = 'date' | string;
261
+ /**
262
+ * Extracts specific metadata from parsed HTML and headers based on a requested type.
263
+ * Currently supports 'date' extraction with a prioritized fallback mechanism.
264
+ *
265
+ * @param result - An object containing the raw HTML content and response headers.
266
+ * @param type - The type of metadata to extract.
267
+ * @returns The extracted and normalized value, or null if not found.
268
+ */
269
+ declare function extractMetadataFrom(result: {
270
+ content: string;
271
+ headers: Headers;
272
+ }, type: MetadataType): string | null;
273
+
274
+ /**
275
+ * Normalizes a date string into a standard ISO 8601 format (UTC).
276
+ * It handles various formats (YYYY-MM-DD, RFC2822, etc.) and performs
277
+ * aggressive cleaning and sanity checks.
278
+ *
279
+ * @param dateStr - The raw date string to normalize.
280
+ * @returns An ISO 8601 string (e.g., "2024-01-20T00:00:00.000Z") or null if invalid.
281
+ */
282
+ declare function normalizeDate(dateStr: string | null): string | null;
283
+
284
+ /**
285
+ * Options for the extractDate function.
286
+ */
287
+ interface ExtractOptions extends FetchExtractorOptions {
288
+ /**
289
+ * Maximum number of bytes to download from the URL.
290
+ * Defaults to 32768 (32KB), which is usually enough for the HTML <head>.
291
+ */
292
+ maxBytes?: number;
293
+ }
294
+ /**
295
+ * High-level convenience function to extract the publication or modification date from a URL.
296
+ * It performs a partial fetch of the content and applies multiple extraction rules
297
+ * (LD+JSON, Meta tags, Time tags, Headers) to find the most reliable date.
298
+ *
299
+ * @param url - The web page URL to analyze.
300
+ * @param options - Fetch and extraction options.
301
+ * @returns An ISO 8601 date string, or null if no valid date could be found.
302
+ *
303
+ * @example
304
+ * ```ts
305
+ * const date = await extractDate('https://example.com/article');
306
+ * console.log(date); // "2024-01-20T12:00:00.000Z"
307
+ * ```
308
+ */
309
+ declare function extractDate(url: string, options?: ExtractOptions): Promise<string | null>;
310
+
311
+ interface VerifiedUrl {
312
+ url: string;
313
+ latency: number;
314
+ }
315
+ /**
316
+ * A general utility to test a list of URLs for availability and latency.
317
+ * Returns a list of verified URLs sorted by response time.
318
+ */
319
+ declare function testUrlsByLatency(urls: string[], options?: {
320
+ timeout?: number;
321
+ limit?: number;
322
+ testPath?: string;
323
+ proxy?: string;
324
+ }): Promise<VerifiedUrl[]>;
143
325
 
144
326
  /**
145
327
  * Constructor definition for Searcher subclasses.
146
328
  */
147
329
  type SearcherConstructor = new (options?: FetcherOptions) => WebSearcher;
330
+
148
331
  /**
149
332
  * The abstract base class for all search engines.
150
333
  *
@@ -176,6 +359,23 @@ declare abstract class WebSearcher extends FetchSession {
176
359
  * Useful for registering shorthand names (e.g., 'g' for 'Google').
177
360
  */
178
361
  static alias?: string | string[];
362
+ /** Default base URLs for engines that support multiple instances. */
363
+ static defaultBaseUrls?: string[];
364
+ /** Globally shared index for tracking the currently active instance (node) across sessions. */
365
+ static currentInstanceIndex?: number;
366
+ /** @internal */
367
+ static _defaultOptions?: SearchOptions;
368
+ /**
369
+ * Gets or sets the default search parameters for this specific engine class.
370
+ * This does not include settings from parent classes.
371
+ */
372
+ static get defaultOptions(): SearchOptions;
373
+ static set defaultOptions(options: SearchOptions);
374
+ /**
375
+ * Retrieves the combined default search options by traversing the prototype chain.
376
+ * Priority: Current class > Parent class > WebSearcher base class.
377
+ */
378
+ static getDefaultOptions(): SearchOptions;
179
379
  /**
180
380
  * Registers a search engine class.
181
381
  *
@@ -219,23 +419,25 @@ declare abstract class WebSearcher extends FetchSession {
219
419
  */
220
420
  static setAliases: (ctor: typeof WebSearcher, ...aliases: string[]) => void;
221
421
  /**
222
- * Static helper to execute a one-off search.
422
+ * Static helper to execute a one-off search or a fallback chain.
223
423
  *
224
- * It creates an instance of the specified engine, executes the search, and then
225
- * automatically disposes of the session.
424
+ * It creates an instance of the specified engine(s), executes the search, and automatically
425
+ * falls back to the next engine in the list if the current one fails or is exhausted.
226
426
  *
227
- * @param engineName - The name of the engine to use (e.g., 'Google').
427
+ * @param engineNames - The name(s) of the engine(s) to use (e.g., 'Google' or ['SearXNG', 'Google']).
228
428
  * @param query - The search query string.
229
429
  * @param options - Combined search options and fetcher options.
230
430
  * @returns A promise resolving to an array of standardized search results.
231
431
  */
232
- static search(engineName: string, query: string, options?: SearchOptions & FetcherOptions): Promise<StandardSearchResult[]>;
432
+ static search(engineNames: string | string[], query: string, options?: SearchOptions & FetcherOptions): Promise<StandardSearchResult[]>;
233
433
  /**
234
434
  * The declarative template for the fetch options.
235
435
  *
236
- * Subclasses **must** implement this getter to provide the engine configuration,
436
+ * Subclasses can implement this getter to provide the engine configuration,
237
437
  * including the base URL, search parameters pattern, and extraction rules.
238
438
  *
439
+ * This getter is **optional** if you override {@link getTemplate}.
440
+ *
239
441
  * Supports variable injection using syntax like `${query}`, `${offset}`, etc.
240
442
  *
241
443
  * @example
@@ -248,7 +450,7 @@ declare abstract class WebSearcher extends FetchSession {
248
450
  * }
249
451
  * ```
250
452
  */
251
- abstract get template(): FetcherOptions;
453
+ get template(): FetcherOptions;
252
454
  /**
253
455
  * Optional pagination configuration.
254
456
  * Defines how the searcher navigates to subsequent pages.
@@ -256,18 +458,39 @@ declare abstract class WebSearcher extends FetchSession {
256
458
  * If undefined, the searcher will only fetch the first page.
257
459
  */
258
460
  get pagination(): PaginationConfig | undefined;
461
+ /**
462
+ * Dynamically retrieves the fetch template based on current variables and search options.
463
+ *
464
+ * Subclasses can override this method to return different extraction rules (actions)
465
+ * or URL patterns based on the search category, region, or other parameters.
466
+ *
467
+ * @param variables - The calculated variables (from formatOptions, pagination, etc.).
468
+ * @param options - The original search options provided by the user.
469
+ * @returns The fetcher configuration to be used for the current request.
470
+ */
471
+ protected getTemplate(variables: Record<string, any>, options: SearchOptions): FetcherOptions;
259
472
  protected createContext(options?: FetcherOptions): _isdk_web_fetcher.FetchContext;
260
473
  /**
261
474
  * Executes a search query.
262
475
  *
263
- * This method handles the pagination loop, variable injection, fetching,
264
- * and result transformation.
476
+ * This method handles the pagination loop, multi-instance failover, variable injection,
477
+ * fetching, and result transformation.
265
478
  *
266
479
  * @param query - The search query string.
267
480
  * @param options - Optional search parameters (e.g., limit, timeRange).
268
481
  * @returns A promise resolving to an array of standardized search results.
269
482
  */
270
483
  search(query: string, options?: SearchOptions): Promise<StandardSearchResult[]>;
484
+ /**
485
+ * Hook for subclasses to validate fetched results before they are accepted.
486
+ * If this returns false, the instance manager will consider the fetch a failure
487
+ * and automatically switch to the next available baseUrl (if any).
488
+ *
489
+ * @param results - The extracted results.
490
+ * @param context - Context including the current baseUrl and page.
491
+ * @returns A promise resolving to true if valid, false otherwise.
492
+ */
493
+ protected validateFetchResult(results: StandardSearchResult[], context: SearchContext): Promise<boolean>;
271
494
  /**
272
495
  * Transform and clean the raw extracted results.
273
496
  *
@@ -347,4 +570,4 @@ declare class GoogleSearcher extends WebSearcher {
347
570
  protected transform(outputs: Record<string, any>): Promise<any[]>;
348
571
  }
349
572
 
350
- export { type CustomTimeRange, GoogleSearcher, type PaginationConfig, type SafeSearchLevel, type SearchCategory, type SearchContext, type SearchOptions, type SearchTimeRange, type SearchTimeRangePreset, type SearcherConstructor, type StandardSearchResult, WebSearcher };
573
+ export { type CustomTimeRange, type ExtractOptions, type FetchExtractorOptions, GoogleSearcher, type HtmlData, type MetadataResult, type MetadataType, type PaginationConfig, type SafeSearchLevel, type SearchCategory, type SearchContext, type SearchOptions, type SearchTimeRange, type SearchTimeRangePreset, type SearcherConstructor, type StandardSearchResult, type VerifiedUrl, WebSearcher, extractDate, extractMetadataFrom, fetchHeaders, fetchPartial, normalizeDate, parseHeaders, parseHtml, testUrlsByLatency };