@dpopsuev/web-spider 0.10.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/batch.d.ts +24 -0
- package/dist/batch.d.ts.map +1 -0
- package/dist/batch.js +68 -0
- package/dist/cache.d.ts +40 -0
- package/dist/cache.d.ts.map +1 -0
- package/dist/cache.js +78 -0
- package/dist/convert.d.ts +29 -0
- package/dist/convert.d.ts.map +1 -0
- package/dist/convert.js +131 -0
- package/dist/crawl.d.ts +56 -0
- package/dist/crawl.d.ts.map +1 -0
- package/dist/crawl.js +126 -0
- package/dist/disk-cache.d.ts +75 -0
- package/dist/disk-cache.d.ts.map +1 -0
- package/dist/disk-cache.js +185 -0
- package/dist/graph.d.ts +76 -0
- package/dist/graph.d.ts.map +1 -0
- package/dist/graph.js +156 -0
- package/dist/index.d.ts +45 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +44 -0
- package/dist/parse.d.ts +27 -0
- package/dist/parse.d.ts.map +1 -0
- package/dist/parse.js +131 -0
- package/dist/playwright.d.ts +75 -0
- package/dist/playwright.d.ts.map +1 -0
- package/dist/playwright.js +141 -0
- package/dist/ports.d.ts +104 -0
- package/dist/ports.d.ts.map +1 -0
- package/dist/ports.js +10 -0
- package/dist/robots.d.ts +24 -0
- package/dist/robots.d.ts.map +1 -0
- package/dist/robots.js +104 -0
- package/dist/search.d.ts +47 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/search.js +112 -0
- package/dist/sitemap.d.ts +15 -0
- package/dist/sitemap.d.ts.map +1 -0
- package/dist/sitemap.js +65 -0
- package/dist/spider.d.ts +74 -0
- package/dist/spider.d.ts.map +1 -0
- package/dist/spider.js +349 -0
- package/dist/throttle.d.ts +49 -0
- package/dist/throttle.d.ts.map +1 -0
- package/dist/throttle.js +85 -0
- package/dist/tree.d.ts +34 -0
- package/dist/tree.d.ts.map +1 -0
- package/dist/tree.js +354 -0
- package/dist/types.d.ts +189 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/views.d.ts +17 -0
- package/dist/views.d.ts.map +1 -0
- package/dist/views.js +39 -0
- package/dist/web-search.d.ts +184 -0
- package/dist/web-search.d.ts.map +1 -0
- package/dist/web-search.js +399 -0
- package/fixtures/article-with-images.html +94 -0
- package/fixtures/gh-shell.html +32 -0
- package/fixtures/guide-ai-agents-web-scraping.json +552 -0
- package/fixtures/images/large.jpg +0 -0
- package/fixtures/images/small.jpg +0 -0
- package/fixtures/images/tiny.png +0 -0
- package/fixtures/quotes-index.json +40 -0
- package/package.json +47 -0
- package/scripts/fetch-guide.mjs +25 -0
- package/src/cache.ts +99 -0
- package/src/convert.ts +161 -0
- package/src/crawl.ts +186 -0
- package/src/disk-cache.ts +228 -0
- package/src/graph.ts +189 -0
- package/src/index.ts +74 -0
- package/src/parse.ts +154 -0
- package/src/playwright.ts +193 -0
- package/src/ports.ts +131 -0
- package/src/robots.ts +121 -0
- package/src/search.ts +173 -0
- package/src/sitemap.ts +67 -0
- package/src/spider.ts +475 -0
- package/src/throttle.ts +118 -0
- package/src/tree.ts +379 -0
- package/src/types.ts +225 -0
- package/src/views.ts +42 -0
- package/src/web-search.ts +548 -0
- package/test/convert-images.test.ts +69 -0
- package/test/disk-cache-images.test.ts +193 -0
- package/test/engine-registry.test.ts +114 -0
- package/test/exports.test.ts +124 -0
- package/test/get-chunk.test.ts +115 -0
- package/test/images-integration.test.ts +359 -0
- package/test/improvements.test.ts +279 -0
- package/test/inbound-count.test.ts +111 -0
- package/test/lean.test.ts +105 -0
- package/test/playwright.test.ts +128 -0
- package/test/ports.test.ts +161 -0
- package/test/search.test.ts +219 -0
- package/test/spider-images.test.ts +180 -0
- package/test/spider-unit.test.ts +610 -0
- package/test/tree.test.ts +272 -0
- package/test/types.test.ts +169 -0
- package/test/web-search-integration.test.ts +180 -0
- package/test/web-search.test.ts +305 -0
- package/tsconfig.json +9 -0
- package/tsconfig.test.json +7 -0
- package/vitest.config.ts +8 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"views.d.ts","sourceRoot":"","sources":["../src/views.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAC5C,OAAO,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAEzD;;;;;;;GAOG;AACH,wBAAgB,MAAM,CAAC,IAAI,EAAE,YAAY,EAAE,KAAK,CAAC,EAAE,SAAS,GAAG,QAAQ,CAwBtE"}
|
package/dist/views.js
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* View transformations — business logic that converts a SpideredPage into
|
|
3
|
+
* one of the available view shapes. Separated from types.ts which is pure
|
|
4
|
+
* data-shape definitions.
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Downgrade a full SpideredPage to a LeanPage.
|
|
8
|
+
*
|
|
9
|
+
* Pass a PageGraph as the second argument to populate `inboundCount` —
|
|
10
|
+
* the number of other spidered pages that link to this one. Agents can
|
|
11
|
+
* use this as a lightweight authority signal when ranking results from
|
|
12
|
+
* a crawl without running a full PageRank pass.
|
|
13
|
+
*/
|
|
14
|
+
export function toLean(page, graph) {
|
|
15
|
+
return {
|
|
16
|
+
view: "lean",
|
|
17
|
+
url: page.url,
|
|
18
|
+
domain: page.domain,
|
|
19
|
+
...(page.canonicalUrl !== undefined ? { canonicalUrl: page.canonicalUrl } : {}),
|
|
20
|
+
title: page.title,
|
|
21
|
+
...(page.description ? { description: page.description } : {}),
|
|
22
|
+
...(page.author ? { author: page.author } : {}),
|
|
23
|
+
...(page.publishedAt ? { publishedAt: page.publishedAt } : {}),
|
|
24
|
+
lang: page.lang,
|
|
25
|
+
tags: page.tags,
|
|
26
|
+
wordCount: page.wordCount,
|
|
27
|
+
readingTimeMinutes: page.readingTimeMinutes,
|
|
28
|
+
chunkCount: page.chunks.length,
|
|
29
|
+
headings: page.headings.map((h) => `${"#".repeat(h.level)} ${h.text}`),
|
|
30
|
+
links: page.links
|
|
31
|
+
.filter((l) => l.rel === "body")
|
|
32
|
+
.slice(0, 10)
|
|
33
|
+
.map((l) => ({ href: l.href, text: l.text })),
|
|
34
|
+
...(graph !== undefined
|
|
35
|
+
? { inboundCount: graph.inbound(page.url).length }
|
|
36
|
+
: {}),
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=views.js.map
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Web search API integration — Brave Search and Tavily.
|
|
3
|
+
*
|
|
4
|
+
* Both return a normalised WebSearchResult[].
|
|
5
|
+
* API keys are read from environment variables by default:
|
|
6
|
+
* BRAVE_SEARCH_API_KEY
|
|
7
|
+
* TAVILY_API_KEY
|
|
8
|
+
*/
|
|
9
|
+
export type { WebSearchResult } from "./ports.js";
|
|
10
|
+
import type { ISearchEngine, SearchQuery, WebSearchResult } from "./ports.js";
|
|
11
|
+
export interface BraveSearchOptions {
|
|
12
|
+
/** API key. Defaults to process.env.BRAVE_SEARCH_API_KEY. */
|
|
13
|
+
apiKey?: string;
|
|
14
|
+
/** Number of results (1–20). Default 10. */
|
|
15
|
+
numResults?: number;
|
|
16
|
+
/** ISO 3166-1 alpha-2 country code for localised results, e.g. "US". */
|
|
17
|
+
country?: string;
|
|
18
|
+
/**
|
|
19
|
+
* Freshness filter. Maps SearchQuery.timeRange to Brave's parameter:
|
|
20
|
+
* "pd" = past day, "pw" = past week, "pm" = past month, "py" = past year.
|
|
21
|
+
* Pass directly when bypassing the adapter, or set timeRange on SearchQuery.
|
|
22
|
+
*/
|
|
23
|
+
freshness?: "pd" | "pw" | "pm" | "py";
|
|
24
|
+
}
|
|
25
|
+
export interface TavilySearchOptions {
|
|
26
|
+
/** API key. Defaults to process.env.TAVILY_API_KEY. */
|
|
27
|
+
apiKey?: string;
|
|
28
|
+
/** Number of results. Default 5. */
|
|
29
|
+
numResults?: number;
|
|
30
|
+
/** "basic" (1 credit) or "advanced" (2 credits). Default "basic". */
|
|
31
|
+
depth?: "basic" | "advanced";
|
|
32
|
+
/** Restrict results to content published within this window. */
|
|
33
|
+
timeRange?: "day" | "week" | "month" | "year";
|
|
34
|
+
/** Topic mode: "news" prioritises fresh news articles. */
|
|
35
|
+
topic?: "news" | "general";
|
|
36
|
+
}
|
|
37
|
+
export type SearchEngine = "brave" | "tavily" | "exa" | "ddg";
|
|
38
|
+
export interface ExaSearchOptions {
|
|
39
|
+
/** API key. Defaults to process.env.EXA_API_KEY. */
|
|
40
|
+
apiKey?: string;
|
|
41
|
+
/** Number of results. Default 10. */
|
|
42
|
+
numResults?: number;
|
|
43
|
+
/**
|
|
44
|
+
* Search type.
|
|
45
|
+
* "auto" — Exa decides keyword vs neural (default).
|
|
46
|
+
* "neural" — embedding-based semantic search.
|
|
47
|
+
* "keyword" — traditional keyword search.
|
|
48
|
+
*/
|
|
49
|
+
type?: "auto" | "neural" | "keyword";
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Search the web via the Exa Search API (neural/semantic retrieval).
|
|
53
|
+
* https://exa.ai/docs/reference/search
|
|
54
|
+
*
|
|
55
|
+
* Returns highlights inline per result — richer snippets without extra round-trips.
|
|
56
|
+
*/
|
|
57
|
+
export declare function exaSearch(query: string, opts?: ExaSearchOptions): Promise<WebSearchResult[]>;
|
|
58
|
+
/**
|
|
59
|
+
* Search the web via the Brave Search API.
|
|
60
|
+
* https://api.search.brave.com/app/documentation/web-search
|
|
61
|
+
*/
|
|
62
|
+
export declare function braveSearch(query: string, opts?: BraveSearchOptions): Promise<WebSearchResult[]>;
|
|
63
|
+
/**
|
|
64
|
+
* Search the web via the Tavily API.
|
|
65
|
+
* https://docs.tavily.com/docs/rest-api/api-reference
|
|
66
|
+
*/
|
|
67
|
+
export declare function tavilySearch(query: string, opts?: TavilySearchOptions): Promise<WebSearchResult[]>;
|
|
68
|
+
export interface DdgSearchOptions {
|
|
69
|
+
/**
|
|
70
|
+
* Maximum results to return. DDG doesn't support a server-side count param;
|
|
71
|
+
* this slices the client-side result list. Default: 10.
|
|
72
|
+
*/
|
|
73
|
+
numResults?: number;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Search via the DuckDuckGo Instant Answer API.
|
|
77
|
+
* https://duckduckgo.com/api
|
|
78
|
+
*
|
|
79
|
+
* No API key required. Returns structured instant answers (Abstract,
|
|
80
|
+
* Results, RelatedTopics) mapped to WebSearchResult[].
|
|
81
|
+
*
|
|
82
|
+
* Limitation: not a full web index — best for well-known entities and
|
|
83
|
+
* unambiguous queries. Returns empty when DDG has no instant answer.
|
|
84
|
+
*/
|
|
85
|
+
export declare function ddgSearch(query: string, opts?: DdgSearchOptions): Promise<WebSearchResult[]>;
|
|
86
|
+
/**
|
|
87
|
+
* Search using whichever engine is explicitly requested or has an API key
|
|
88
|
+
* available. Falls through to the DDG Instant Answer API as a zero-cost
|
|
89
|
+
* last resort — no key required.
|
|
90
|
+
*
|
|
91
|
+
* Prefer {@link defaultSearchEngine} + {@link FallbackSearchEngine} when
|
|
92
|
+
* you need composable retry / fallback behaviour.
|
|
93
|
+
*/
|
|
94
|
+
export declare function webSearch(query: string, opts?: {
|
|
95
|
+
engine?: SearchEngine;
|
|
96
|
+
numResults?: number;
|
|
97
|
+
timeRange?: "day" | "week" | "month" | "year";
|
|
98
|
+
topic?: "news" | "general";
|
|
99
|
+
}): Promise<WebSearchResult[]>;
|
|
100
|
+
/**
|
|
101
|
+
* A factory that creates an ISearchEngine from an optional API key.
|
|
102
|
+
* key is undefined for keyless engines (e.g. DDG).
|
|
103
|
+
*/
|
|
104
|
+
type EngineFactory = (key: string | undefined) => ISearchEngine;
|
|
105
|
+
/**
|
|
106
|
+
* Register a search engine under a name.
|
|
107
|
+
*
|
|
108
|
+
* Call this to add a new engine without touching any existing code:
|
|
109
|
+
* @example
|
|
110
|
+
* registerSearchEngine("my-engine", (key) => new MyEngine(key!))
|
|
111
|
+
*/
|
|
112
|
+
export declare function registerSearchEngine(name: string, factory: EngineFactory): void;
|
|
113
|
+
/**
|
|
114
|
+
* Resolve a registered engine by name, passing the provided API key.
|
|
115
|
+
* Throws a descriptive error for unknown names or missing required keys.
|
|
116
|
+
*/
|
|
117
|
+
export declare function resolveSearchEngine(name: string, key?: string | undefined): ISearchEngine;
|
|
118
|
+
/** Brave Search adapter implementing ISearchEngine. */
|
|
119
|
+
export declare class BraveSearchEngine implements ISearchEngine {
|
|
120
|
+
private readonly apiKey;
|
|
121
|
+
private readonly country?;
|
|
122
|
+
constructor(apiKey: string, country?: string | undefined);
|
|
123
|
+
search(req: SearchQuery): Promise<WebSearchResult[]>;
|
|
124
|
+
}
|
|
125
|
+
/** Tavily adapter implementing ISearchEngine. */
|
|
126
|
+
export declare class TavilySearchEngine implements ISearchEngine {
|
|
127
|
+
private readonly apiKey;
|
|
128
|
+
constructor(apiKey: string);
|
|
129
|
+
search(req: SearchQuery): Promise<WebSearchResult[]>;
|
|
130
|
+
}
|
|
131
|
+
/** Exa adapter implementing ISearchEngine. */
|
|
132
|
+
export declare class ExaSearchEngine implements ISearchEngine {
|
|
133
|
+
private readonly apiKey;
|
|
134
|
+
constructor(apiKey: string);
|
|
135
|
+
search(req: SearchQuery): Promise<WebSearchResult[]>;
|
|
136
|
+
}
|
|
137
|
+
/** DuckDuckGo Instant Answer adapter — no API key required. */
|
|
138
|
+
export declare class DdgSearchEngine implements ISearchEngine {
|
|
139
|
+
search(req: SearchQuery): Promise<WebSearchResult[]>;
|
|
140
|
+
}
|
|
141
|
+
export interface FallbackSearchEngineOptions {
|
|
142
|
+
/**
|
|
143
|
+
* Treat an empty result set as a failure and try the next engine.
|
|
144
|
+
* Default: true.
|
|
145
|
+
*/
|
|
146
|
+
fallbackOnEmpty?: boolean;
|
|
147
|
+
/**
|
|
148
|
+
* Swallow a thrown error and try the next engine instead of propagating.
|
|
149
|
+
* Default: true.
|
|
150
|
+
*/
|
|
151
|
+
fallbackOnError?: boolean;
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* A composite ISearchEngine that tries each engine in order, falling back
|
|
155
|
+
* to the next when the current one returns empty results or throws.
|
|
156
|
+
*
|
|
157
|
+
* Because it implements ISearchEngine itself it is fully composable —
|
|
158
|
+
* nest FallbackSearchEngines, wrap them in caches, inject stubs in tests.
|
|
159
|
+
*
|
|
160
|
+
* @example
|
|
161
|
+
* // Tavily with DDG as zero-cost fallback
|
|
162
|
+
* const engine = new FallbackSearchEngine([
|
|
163
|
+
* new TavilySearchEngine(process.env.TAVILY_API_KEY),
|
|
164
|
+
* new DdgSearchEngine(),
|
|
165
|
+
* ]);
|
|
166
|
+
*/
|
|
167
|
+
export declare class FallbackSearchEngine implements ISearchEngine {
|
|
168
|
+
private readonly engines;
|
|
169
|
+
private readonly fallbackOnEmpty;
|
|
170
|
+
private readonly fallbackOnError;
|
|
171
|
+
constructor(engines: ISearchEngine[], opts?: FallbackSearchEngineOptions);
|
|
172
|
+
search(req: SearchQuery): Promise<WebSearchResult[]>;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Build a FallbackSearchEngine chain from environment variables.
|
|
176
|
+
*
|
|
177
|
+
* Priority order for keyed engines: Brave → Tavily → Exa.
|
|
178
|
+
* DuckDuckGo is always appended as the zero-cost last resort.
|
|
179
|
+
*
|
|
180
|
+
* The returned engine implements ISearchEngine — swap it for any stub
|
|
181
|
+
* in tests without touching call sites.
|
|
182
|
+
*/
|
|
183
|
+
export declare function defaultSearchEngine(): ISearchEngine;
|
|
184
|
+
//# sourceMappingURL=web-search.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"web-search.d.ts","sourceRoot":"","sources":["../src/web-search.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,YAAY,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAClD,OAAO,KAAK,EAAE,aAAa,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAE9E,MAAM,WAAW,kBAAkB;IAClC,6DAA6D;IAC7D,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,4CAA4C;IAC5C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,wEAAwE;IACxE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;;OAIG;IACH,SAAS,CAAC,EAAE,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC;CACtC;AAED,MAAM,WAAW,mBAAmB;IACnC,uDAAuD;IACvD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,oCAAoC;IACpC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,qEAAqE;IACrE,KAAK,CAAC,EAAE,OAAO,GAAG,UAAU,CAAC;IAC7B,gEAAgE;IAChE,SAAS,CAAC,EAAE,KAAK,GAAG,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC;IAC9C,0DAA0D;IAC1D,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CAC3B;AAED,MAAM,MAAM,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,KAAK,GAAG,KAAK,CAAC;AAE9D,MAAM,WAAW,gBAAgB;IAChC,oDAAoD;IACpD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,qCAAqC;IACrC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB;;;;;OAKG;IACH,IAAI,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,SAAS,CAAC;CACrC;AAED;;;;;GAKG;AACH,wBAAsB,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,gBAAqB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA6CtG;AAED;;;GAGG;AACH,wBAAsB,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,kBAAuB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA8C1G;AAED;;;GAGG;AACH,wBAAsB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,mBAAwB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA2C5G;AAMD,MAAM,WAAW,gBAAgB;IAChC;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;;;;;GASG;AACH,wBAAsB,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,gBAAqB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAwEtG;AAED;;;;;;;GAOG;AACH,wBAAsB,SAAS,CAC9B,KAAK,EAAE,MAAM,EACb,IAAI,GAAE;IACL,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,KAAK,GAAG,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC;IAC9C,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CACtB,GACJ,OAAO,CAAC,eAAe,EAAE,CAAC,CAU5B;AAMD;;;GAGG;AACH,KAAK,aAAa,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,KAAK,aAAa,CAAC;AAKhE;;;;;;GAMG;AACH,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,GAAG,IAAI,CAE/E;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,aAAa,CAIzF;AAwCD,uDAAuD;AACvD,qBAAa,iBAAkB,YAAW,aAAa;IAC1C,OAAO,CAAC,QAAQ,CAAC,MAAM;IAAU,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC;gBAAzC,MAAM,EAAE,MAAM,EAAmB,OAAO,CAAC,EAAE,MAAM,YAAA;IAE9E,MAAM,CAAC,GAAG,EAAE,WAAW,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;CASpD;AAED,iDAAiD;AACjD,qBAAa,kBAAmB,YAAW,aAAa;IAC3C,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAN,MAAM,EAAE,MAAM;IAE3C,MAAM,CAAC,GAAG,EAAE,WAAW,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;CAQpD;AAED,8CAA8C;AAC9C,qBAAa,eAAgB,YAAW,aAAa;IACxC,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAN,MAAM,EAAE,MAAM;IAE3C,MAAM,CAAC,GAAG,EAAE,WAAW,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;CAGpD;AAED,+DAA+D;AAC/D,qBAAa,eAAgB,YAAW,aAAa;IACpD,MAAM,CAAC,GAAG,EAAE,WAAW,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;CAGpD;AAMD,MAAM,WAAW,2BAA2B;IAC3C;;;OAGG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B;;;OAGG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;;;;;;;;;;;;GAaG;AACH,qBAAa,oBAAqB,YAAW,aAAa;IAKxD,OAAO,CAAC,QAAQ,CAAC,OAAO;IAJzB,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAU;IAC1C,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAU;gBAGxB,OAAO,EAAE,aAAa,EAAE,EACzC,IAAI,GAAE,2BAAgC;IAOjC,MAAM,CAAC,GAAG,EAAE,WAAW,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;CAmB1D;AAMD;;;;;;;;GAQG;AACH,wBAAgB,mBAAmB,IAAI,aAAa,CAgBnD"}
|
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Web search API integration — Brave Search and Tavily.
|
|
3
|
+
*
|
|
4
|
+
* Both return a normalised WebSearchResult[].
|
|
5
|
+
* API keys are read from environment variables by default:
|
|
6
|
+
* BRAVE_SEARCH_API_KEY
|
|
7
|
+
* TAVILY_API_KEY
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Search the web via the Exa Search API (neural/semantic retrieval).
|
|
11
|
+
* https://exa.ai/docs/reference/search
|
|
12
|
+
*
|
|
13
|
+
* Returns highlights inline per result — richer snippets without extra round-trips.
|
|
14
|
+
*/
|
|
15
|
+
export async function exaSearch(query, opts = {}) {
|
|
16
|
+
const apiKey = opts.apiKey ?? process.env["EXA_API_KEY"];
|
|
17
|
+
if (!apiKey)
|
|
18
|
+
throw new Error("Exa API key required — set EXA_API_KEY or pass opts.apiKey");
|
|
19
|
+
const controller = new AbortController();
|
|
20
|
+
const timer = setTimeout(() => controller.abort(), 15_000);
|
|
21
|
+
let res;
|
|
22
|
+
try {
|
|
23
|
+
res = await fetch("https://api.exa.ai/search", {
|
|
24
|
+
method: "POST",
|
|
25
|
+
signal: controller.signal,
|
|
26
|
+
headers: {
|
|
27
|
+
"Content-Type": "application/json",
|
|
28
|
+
"x-api-key": apiKey,
|
|
29
|
+
},
|
|
30
|
+
body: JSON.stringify({
|
|
31
|
+
query,
|
|
32
|
+
numResults: opts.numResults ?? 10,
|
|
33
|
+
type: opts.type ?? "auto",
|
|
34
|
+
contents: {
|
|
35
|
+
highlights: { numSentences: 2, highlightsPerUrl: 3 },
|
|
36
|
+
},
|
|
37
|
+
}),
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
finally {
|
|
41
|
+
clearTimeout(timer);
|
|
42
|
+
}
|
|
43
|
+
if (!res.ok)
|
|
44
|
+
throw new Error(`Exa API error: ${res.status} ${res.statusText}`);
|
|
45
|
+
const data = (await res.json());
|
|
46
|
+
return (data.results ?? []).map((r) => ({
|
|
47
|
+
url: r.url,
|
|
48
|
+
title: r.title,
|
|
49
|
+
snippet: r.highlights?.join(" … ") ?? "",
|
|
50
|
+
...(r.publishedDate ? { publishedAt: r.publishedDate } : {}),
|
|
51
|
+
}));
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Search the web via the Brave Search API.
|
|
55
|
+
* https://api.search.brave.com/app/documentation/web-search
|
|
56
|
+
*/
|
|
57
|
+
export async function braveSearch(query, opts = {}) {
|
|
58
|
+
const apiKey = opts.apiKey ?? process.env["BRAVE_SEARCH_API_KEY"];
|
|
59
|
+
if (!apiKey)
|
|
60
|
+
throw new Error("Brave Search API key required — set BRAVE_SEARCH_API_KEY or pass opts.apiKey");
|
|
61
|
+
const params = new URLSearchParams({
|
|
62
|
+
q: query,
|
|
63
|
+
count: String(Math.min(opts.numResults ?? 10, 20)),
|
|
64
|
+
});
|
|
65
|
+
if (opts.country)
|
|
66
|
+
params.set("country", opts.country);
|
|
67
|
+
if (opts.freshness)
|
|
68
|
+
params.set("freshness", opts.freshness);
|
|
69
|
+
const controller = new AbortController();
|
|
70
|
+
const timer = setTimeout(() => controller.abort(), 10_000);
|
|
71
|
+
let res;
|
|
72
|
+
try {
|
|
73
|
+
res = await fetch(`https://api.search.brave.com/res/v1/web/search?${params}`, {
|
|
74
|
+
signal: controller.signal,
|
|
75
|
+
headers: {
|
|
76
|
+
Accept: "application/json",
|
|
77
|
+
"Accept-Encoding": "gzip",
|
|
78
|
+
"X-Subscription-Token": apiKey,
|
|
79
|
+
},
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
finally {
|
|
83
|
+
clearTimeout(timer);
|
|
84
|
+
}
|
|
85
|
+
if (!res.ok)
|
|
86
|
+
throw new Error(`Brave Search API error: ${res.status} ${res.statusText}`);
|
|
87
|
+
const data = (await res.json());
|
|
88
|
+
return (data.web?.results ?? []).map((r) => ({
|
|
89
|
+
url: r.url,
|
|
90
|
+
title: r.title,
|
|
91
|
+
snippet: r.description ?? "",
|
|
92
|
+
...(r.age ? { publishedAt: r.age } : {}),
|
|
93
|
+
}));
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Search the web via the Tavily API.
|
|
97
|
+
* https://docs.tavily.com/docs/rest-api/api-reference
|
|
98
|
+
*/
|
|
99
|
+
export async function tavilySearch(query, opts = {}) {
|
|
100
|
+
const apiKey = opts.apiKey ?? process.env["TAVILY_API_KEY"];
|
|
101
|
+
if (!apiKey)
|
|
102
|
+
throw new Error("Tavily API key required — set TAVILY_API_KEY or pass opts.apiKey");
|
|
103
|
+
const controller = new AbortController();
|
|
104
|
+
const timer = setTimeout(() => controller.abort(), 15_000);
|
|
105
|
+
let res;
|
|
106
|
+
try {
|
|
107
|
+
res = await fetch("https://api.tavily.com/search", {
|
|
108
|
+
method: "POST",
|
|
109
|
+
signal: controller.signal,
|
|
110
|
+
headers: { "Content-Type": "application/json" },
|
|
111
|
+
body: JSON.stringify({
|
|
112
|
+
query,
|
|
113
|
+
api_key: apiKey,
|
|
114
|
+
max_results: opts.numResults ?? 5,
|
|
115
|
+
search_depth: opts.depth ?? "basic",
|
|
116
|
+
include_raw_content: false,
|
|
117
|
+
...(opts.timeRange ? { time_range: opts.timeRange } : {}),
|
|
118
|
+
...(opts.topic ? { topic: opts.topic } : {}),
|
|
119
|
+
}),
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
finally {
|
|
123
|
+
clearTimeout(timer);
|
|
124
|
+
}
|
|
125
|
+
if (!res.ok)
|
|
126
|
+
throw new Error(`Tavily API error: ${res.status} ${res.statusText}`);
|
|
127
|
+
const data = (await res.json());
|
|
128
|
+
return (data.results ?? []).map((r) => ({
|
|
129
|
+
url: r.url,
|
|
130
|
+
title: r.title,
|
|
131
|
+
snippet: r.content ?? "",
|
|
132
|
+
...(r.published_date ? { publishedAt: r.published_date } : {}),
|
|
133
|
+
}));
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Search via the DuckDuckGo Instant Answer API.
|
|
137
|
+
* https://duckduckgo.com/api
|
|
138
|
+
*
|
|
139
|
+
* No API key required. Returns structured instant answers (Abstract,
|
|
140
|
+
* Results, RelatedTopics) mapped to WebSearchResult[].
|
|
141
|
+
*
|
|
142
|
+
* Limitation: not a full web index — best for well-known entities and
|
|
143
|
+
* unambiguous queries. Returns empty when DDG has no instant answer.
|
|
144
|
+
*/
|
|
145
|
+
export async function ddgSearch(query, opts = {}) {
|
|
146
|
+
const params = new URLSearchParams({
|
|
147
|
+
q: query,
|
|
148
|
+
format: "json",
|
|
149
|
+
no_redirect: "1",
|
|
150
|
+
no_html: "1",
|
|
151
|
+
skip_disambig: "1",
|
|
152
|
+
});
|
|
153
|
+
const controller = new AbortController();
|
|
154
|
+
const timer = setTimeout(() => controller.abort(), 10_000);
|
|
155
|
+
let res;
|
|
156
|
+
try {
|
|
157
|
+
res = await fetch(`https://api.duckduckgo.com/?${params}`, {
|
|
158
|
+
signal: controller.signal,
|
|
159
|
+
headers: {
|
|
160
|
+
Accept: "application/json",
|
|
161
|
+
// DDG silently returns an empty 200 body for browser-like or
|
|
162
|
+
// missing User-Agents. A curl/bot-style UA gets a real 202.
|
|
163
|
+
"User-Agent": "web-spider/0.8",
|
|
164
|
+
},
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
finally {
|
|
168
|
+
clearTimeout(timer);
|
|
169
|
+
}
|
|
170
|
+
if (!res.ok)
|
|
171
|
+
throw new Error(`DDG API error: ${res.status} ${res.statusText}`);
|
|
172
|
+
const data = (await res.json());
|
|
173
|
+
const results = [];
|
|
174
|
+
const limit = opts.numResults ?? 10;
|
|
175
|
+
// 1. Instant answer abstract (Wikipedia-style knowledge panel)
|
|
176
|
+
if (data.Abstract && data.AbstractURL) {
|
|
177
|
+
results.push({
|
|
178
|
+
url: data.AbstractURL,
|
|
179
|
+
title: data.Heading ?? data.AbstractSource ?? "DuckDuckGo",
|
|
180
|
+
snippet: data.Abstract,
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
// 2. Official results (e.g. official site links)
|
|
184
|
+
for (const r of data.Results ?? []) {
|
|
185
|
+
if (results.length >= limit)
|
|
186
|
+
break;
|
|
187
|
+
if (r.FirstURL)
|
|
188
|
+
results.push({ url: r.FirstURL, title: r.Text, snippet: r.Text });
|
|
189
|
+
}
|
|
190
|
+
// 3. Related topics — flatten one level of nesting
|
|
191
|
+
for (const topic of data.RelatedTopics ?? []) {
|
|
192
|
+
if (results.length >= limit)
|
|
193
|
+
break;
|
|
194
|
+
if (topic.FirstURL && topic.Text) {
|
|
195
|
+
results.push({ url: topic.FirstURL, title: topic.Text, snippet: topic.Text });
|
|
196
|
+
}
|
|
197
|
+
for (const sub of topic.Topics ?? []) {
|
|
198
|
+
if (results.length >= limit)
|
|
199
|
+
break;
|
|
200
|
+
results.push({ url: sub.FirstURL, title: sub.Text, snippet: sub.Text });
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
return results;
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Search using whichever engine is explicitly requested or has an API key
|
|
207
|
+
* available. Falls through to the DDG Instant Answer API as a zero-cost
|
|
208
|
+
* last resort — no key required.
|
|
209
|
+
*
|
|
210
|
+
* Prefer {@link defaultSearchEngine} + {@link FallbackSearchEngine} when
|
|
211
|
+
* you need composable retry / fallback behaviour.
|
|
212
|
+
*/
|
|
213
|
+
export async function webSearch(query, opts = {}) {
|
|
214
|
+
const engine = opts.engine
|
|
215
|
+
? resolveSearchEngine(opts.engine, process.env[envKeyForEngine(opts.engine)])
|
|
216
|
+
: defaultSearchEngine();
|
|
217
|
+
return engine.search({
|
|
218
|
+
query,
|
|
219
|
+
numResults: opts.numResults,
|
|
220
|
+
timeRange: opts.timeRange,
|
|
221
|
+
topic: opts.topic,
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
/** The global engine registry. Seeded with built-in engines below. */
|
|
225
|
+
const ENGINE_REGISTRY = new Map();
|
|
226
|
+
/**
|
|
227
|
+
* Register a search engine under a name.
|
|
228
|
+
*
|
|
229
|
+
* Call this to add a new engine without touching any existing code:
|
|
230
|
+
* @example
|
|
231
|
+
* registerSearchEngine("my-engine", (key) => new MyEngine(key!))
|
|
232
|
+
*/
|
|
233
|
+
export function registerSearchEngine(name, factory) {
|
|
234
|
+
ENGINE_REGISTRY.set(name, factory);
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Resolve a registered engine by name, passing the provided API key.
|
|
238
|
+
* Throws a descriptive error for unknown names or missing required keys.
|
|
239
|
+
*/
|
|
240
|
+
export function resolveSearchEngine(name, key) {
|
|
241
|
+
const factory = ENGINE_REGISTRY.get(name);
|
|
242
|
+
if (!factory)
|
|
243
|
+
throw new Error(`Unknown search engine: "${name}". Register it with registerSearchEngine().`);
|
|
244
|
+
return factory(key);
|
|
245
|
+
}
|
|
246
|
+
/** @internal Map engine name to its env var key name (for webSearch auto-detect). */
|
|
247
|
+
function envKeyForEngine(name) {
|
|
248
|
+
const envKeys = {
|
|
249
|
+
brave: "BRAVE_SEARCH_API_KEY",
|
|
250
|
+
tavily: "TAVILY_API_KEY",
|
|
251
|
+
exa: "EXA_API_KEY",
|
|
252
|
+
};
|
|
253
|
+
return envKeys[name] ?? "";
|
|
254
|
+
}
|
|
255
|
+
// Seed the registry with built-in engines.
|
|
256
|
+
// Adding a new engine: call registerSearchEngine() — do NOT edit this block.
|
|
257
|
+
registerSearchEngine("brave", (key) => {
|
|
258
|
+
if (!key)
|
|
259
|
+
throw new Error("BRAVE_SEARCH_API_KEY not set");
|
|
260
|
+
return new BraveSearchEngine(key);
|
|
261
|
+
});
|
|
262
|
+
registerSearchEngine("tavily", (key) => {
|
|
263
|
+
if (!key)
|
|
264
|
+
throw new Error("TAVILY_API_KEY not set");
|
|
265
|
+
return new TavilySearchEngine(key);
|
|
266
|
+
});
|
|
267
|
+
registerSearchEngine("exa", (key) => {
|
|
268
|
+
if (!key)
|
|
269
|
+
throw new Error("EXA_API_KEY not set");
|
|
270
|
+
return new ExaSearchEngine(key);
|
|
271
|
+
});
|
|
272
|
+
registerSearchEngine("ddg", () => new DdgSearchEngine());
|
|
273
|
+
// ---------------------------------------------------------------------------
|
|
274
|
+
// ISearchEngine adapters — concrete implementations of the port
|
|
275
|
+
// ---------------------------------------------------------------------------
|
|
276
|
+
/** Maps the canonical timeRange string to Brave's freshness parameter. */
|
|
277
|
+
const BRAVE_FRESHNESS = {
|
|
278
|
+
day: "pd",
|
|
279
|
+
week: "pw",
|
|
280
|
+
month: "pm",
|
|
281
|
+
year: "py",
|
|
282
|
+
};
|
|
283
|
+
/** Brave Search adapter implementing ISearchEngine. */
|
|
284
|
+
export class BraveSearchEngine {
|
|
285
|
+
constructor(apiKey, country) {
|
|
286
|
+
this.apiKey = apiKey;
|
|
287
|
+
this.country = country;
|
|
288
|
+
}
|
|
289
|
+
search(req) {
|
|
290
|
+
const freshness = req.timeRange ? BRAVE_FRESHNESS[req.timeRange] : undefined;
|
|
291
|
+
return braveSearch(req.query, {
|
|
292
|
+
apiKey: this.apiKey,
|
|
293
|
+
numResults: req.numResults,
|
|
294
|
+
country: this.country,
|
|
295
|
+
freshness,
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
/** Tavily adapter implementing ISearchEngine. */
|
|
300
|
+
export class TavilySearchEngine {
|
|
301
|
+
constructor(apiKey) {
|
|
302
|
+
this.apiKey = apiKey;
|
|
303
|
+
}
|
|
304
|
+
search(req) {
|
|
305
|
+
return tavilySearch(req.query, {
|
|
306
|
+
apiKey: this.apiKey,
|
|
307
|
+
numResults: req.numResults,
|
|
308
|
+
timeRange: req.timeRange,
|
|
309
|
+
topic: req.topic,
|
|
310
|
+
});
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
/** Exa adapter implementing ISearchEngine. */
|
|
314
|
+
export class ExaSearchEngine {
|
|
315
|
+
constructor(apiKey) {
|
|
316
|
+
this.apiKey = apiKey;
|
|
317
|
+
}
|
|
318
|
+
search(req) {
|
|
319
|
+
return exaSearch(req.query, { apiKey: this.apiKey, numResults: req.numResults });
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
/** DuckDuckGo Instant Answer adapter — no API key required. */
|
|
323
|
+
export class DdgSearchEngine {
|
|
324
|
+
search(req) {
|
|
325
|
+
return ddgSearch(req.query, { numResults: req.numResults });
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
/**
|
|
329
|
+
* A composite ISearchEngine that tries each engine in order, falling back
|
|
330
|
+
* to the next when the current one returns empty results or throws.
|
|
331
|
+
*
|
|
332
|
+
* Because it implements ISearchEngine itself it is fully composable —
|
|
333
|
+
* nest FallbackSearchEngines, wrap them in caches, inject stubs in tests.
|
|
334
|
+
*
|
|
335
|
+
* @example
|
|
336
|
+
* // Tavily with DDG as zero-cost fallback
|
|
337
|
+
* const engine = new FallbackSearchEngine([
|
|
338
|
+
* new TavilySearchEngine(process.env.TAVILY_API_KEY),
|
|
339
|
+
* new DdgSearchEngine(),
|
|
340
|
+
* ]);
|
|
341
|
+
*/
|
|
342
|
+
export class FallbackSearchEngine {
|
|
343
|
+
constructor(engines, opts = {}) {
|
|
344
|
+
this.engines = engines;
|
|
345
|
+
if (engines.length === 0)
|
|
346
|
+
throw new Error("FallbackSearchEngine requires at least one engine");
|
|
347
|
+
this.fallbackOnEmpty = opts.fallbackOnEmpty ?? true;
|
|
348
|
+
this.fallbackOnError = opts.fallbackOnError ?? true;
|
|
349
|
+
}
|
|
350
|
+
async search(req) {
|
|
351
|
+
let lastError;
|
|
352
|
+
for (const engine of this.engines) {
|
|
353
|
+
try {
|
|
354
|
+
const results = await engine.search(req);
|
|
355
|
+
if (results.length > 0 || !this.fallbackOnEmpty)
|
|
356
|
+
return results;
|
|
357
|
+
// Empty + fallbackOnEmpty → try next engine
|
|
358
|
+
}
|
|
359
|
+
catch (err) {
|
|
360
|
+
if (!this.fallbackOnError)
|
|
361
|
+
throw err;
|
|
362
|
+
lastError = err;
|
|
363
|
+
// Error + fallbackOnError → try next engine
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
// All engines exhausted — surface the last error or return empty
|
|
367
|
+
if (lastError)
|
|
368
|
+
throw lastError;
|
|
369
|
+
return [];
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
// ---------------------------------------------------------------------------
|
|
373
|
+
// Wiring — compose engines from environment variables
|
|
374
|
+
// ---------------------------------------------------------------------------
|
|
375
|
+
/**
|
|
376
|
+
* Build a FallbackSearchEngine chain from environment variables.
|
|
377
|
+
*
|
|
378
|
+
* Priority order for keyed engines: Brave → Tavily → Exa.
|
|
379
|
+
* DuckDuckGo is always appended as the zero-cost last resort.
|
|
380
|
+
*
|
|
381
|
+
* The returned engine implements ISearchEngine — swap it for any stub
|
|
382
|
+
* in tests without touching call sites.
|
|
383
|
+
*/
|
|
384
|
+
export function defaultSearchEngine() {
|
|
385
|
+
const engines = [];
|
|
386
|
+
const brave = process.env["BRAVE_SEARCH_API_KEY"];
|
|
387
|
+
if (brave)
|
|
388
|
+
engines.push(new BraveSearchEngine(brave));
|
|
389
|
+
const tavily = process.env["TAVILY_API_KEY"];
|
|
390
|
+
if (tavily)
|
|
391
|
+
engines.push(new TavilySearchEngine(tavily));
|
|
392
|
+
const exa = process.env["EXA_API_KEY"];
|
|
393
|
+
if (exa)
|
|
394
|
+
engines.push(new ExaSearchEngine(exa));
|
|
395
|
+
// DDG always last — no key needed, never throws the "no key" error
|
|
396
|
+
engines.push(new DdgSearchEngine());
|
|
397
|
+
return new FallbackSearchEngine(engines);
|
|
398
|
+
}
|
|
399
|
+
//# sourceMappingURL=web-search.js.map
|