@counterposition/pi-web-search 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +675 -0
- package/README.md +30 -0
- package/extensions/web-search.ts +249 -0
- package/package.json +60 -0
- package/src/config.ts +281 -0
- package/src/format.ts +348 -0
- package/src/page-cache.ts +58 -0
- package/src/pi-ambient.d.ts +30 -0
- package/src/provider-utils.ts +269 -0
- package/src/providers/brave.ts +292 -0
- package/src/providers/exa.ts +227 -0
- package/src/providers/firecrawl.ts +67 -0
- package/src/providers/index.ts +38 -0
- package/src/providers/jina.ts +131 -0
- package/src/providers/serper.ts +193 -0
- package/src/providers/tavily.ts +231 -0
- package/src/types.ts +131 -0
- package/src/url-safety.ts +92 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
import {
|
|
2
|
+
addSiteConstraint,
|
|
3
|
+
dedupeResultsByUrl,
|
|
4
|
+
fetchJson,
|
|
5
|
+
hostnameFromUrl,
|
|
6
|
+
MAX_RESPONSE_BYTES,
|
|
7
|
+
normalizeIsoDate,
|
|
8
|
+
ProviderError,
|
|
9
|
+
TIMEOUTS,
|
|
10
|
+
truncateSnippet,
|
|
11
|
+
} from "../provider-utils.js";
|
|
12
|
+
import type {
|
|
13
|
+
ProviderSearchResponse,
|
|
14
|
+
SearchProvider,
|
|
15
|
+
SearchProviderArgs,
|
|
16
|
+
SearchResult,
|
|
17
|
+
} from "../types.js";
|
|
18
|
+
import { normalizeDomains } from "../config.js";
|
|
19
|
+
|
|
20
|
+
const BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search";
|
|
21
|
+
const MAX_BRAVE_RESULTS = 20;
|
|
22
|
+
|
|
23
|
+
type BraveResult = {
|
|
24
|
+
title?: unknown;
|
|
25
|
+
url?: unknown;
|
|
26
|
+
description?: unknown;
|
|
27
|
+
snippet?: unknown;
|
|
28
|
+
publishedDate?: unknown;
|
|
29
|
+
publishedAt?: unknown;
|
|
30
|
+
date?: unknown;
|
|
31
|
+
age?: unknown;
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
function freshnessToBrave(value: SearchProviderArgs["freshness"]): string | undefined {
|
|
35
|
+
switch (value) {
|
|
36
|
+
case "day":
|
|
37
|
+
return "pd";
|
|
38
|
+
case "week":
|
|
39
|
+
return "pw";
|
|
40
|
+
case "month":
|
|
41
|
+
return "pm";
|
|
42
|
+
case "year":
|
|
43
|
+
return "py";
|
|
44
|
+
default:
|
|
45
|
+
return undefined;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function isObject(value: unknown): value is Record<string, unknown> {
|
|
50
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function parseResults(payload: unknown): SearchResult[] {
|
|
54
|
+
if (!isObject(payload) || !isObject(payload.web) || !Array.isArray(payload.web.results)) {
|
|
55
|
+
throw new ProviderError({
|
|
56
|
+
provider: "brave",
|
|
57
|
+
message: "Brave returned unexpected response shape.",
|
|
58
|
+
transient: false,
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const results: SearchResult[] = [];
|
|
63
|
+
for (const raw of payload.web.results as BraveResult[]) {
|
|
64
|
+
const parsed = parseResult(raw);
|
|
65
|
+
if (parsed) results.push(parsed);
|
|
66
|
+
}
|
|
67
|
+
return results;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function parseResult(raw: BraveResult): SearchResult | undefined {
|
|
71
|
+
const title = typeof raw.title === "string" ? raw.title.trim() : "";
|
|
72
|
+
const url = typeof raw.url === "string" ? raw.url.trim() : "";
|
|
73
|
+
if (!title || !url) return undefined;
|
|
74
|
+
|
|
75
|
+
const sourceDomain = hostnameFromUrl(url);
|
|
76
|
+
|
|
77
|
+
const publishedAt = normalizeIsoDate(
|
|
78
|
+
firstString(raw.publishedDate, raw.publishedAt, raw.date, raw.age),
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
return {
|
|
82
|
+
title,
|
|
83
|
+
url,
|
|
84
|
+
snippet: truncateSnippet(firstString(raw.description, raw.snippet) ?? "", 500),
|
|
85
|
+
sourceDomain,
|
|
86
|
+
publishedAt,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function firstString(...values: unknown[]): string | undefined {
|
|
91
|
+
for (const value of values) {
|
|
92
|
+
if (typeof value === "string" && value.trim()) return value.trim();
|
|
93
|
+
}
|
|
94
|
+
return undefined;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function buildUrl(args: {
|
|
98
|
+
query: string;
|
|
99
|
+
maxResults: number;
|
|
100
|
+
freshness?: SearchProviderArgs["freshness"];
|
|
101
|
+
domain?: string;
|
|
102
|
+
}): string {
|
|
103
|
+
const url = new URL(BRAVE_SEARCH_URL);
|
|
104
|
+
url.searchParams.set("q", args.domain ? addSiteConstraint(args.query, args.domain) : args.query);
|
|
105
|
+
url.searchParams.set("count", String(Math.min(Math.max(args.maxResults, 1), MAX_BRAVE_RESULTS)));
|
|
106
|
+
|
|
107
|
+
const freshness = freshnessToBrave(args.freshness);
|
|
108
|
+
if (freshness) {
|
|
109
|
+
url.searchParams.set("freshness", freshness);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return url.toString();
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
async function searchOnce(args: {
|
|
116
|
+
query: string;
|
|
117
|
+
maxResults: number;
|
|
118
|
+
freshness?: SearchProviderArgs["freshness"];
|
|
119
|
+
domain?: string;
|
|
120
|
+
signal: AbortSignal;
|
|
121
|
+
apiKey: string;
|
|
122
|
+
}): Promise<ProviderSearchResponse> {
|
|
123
|
+
const url = buildUrl(args);
|
|
124
|
+
const results = await fetchJson<SearchResult[]>("brave", url, {
|
|
125
|
+
method: "GET",
|
|
126
|
+
headers: {
|
|
127
|
+
Accept: "application/json",
|
|
128
|
+
"X-Subscription-Token": args.apiKey,
|
|
129
|
+
},
|
|
130
|
+
signal: args.signal,
|
|
131
|
+
timeoutMs: TIMEOUTS.searchBasicMs,
|
|
132
|
+
maxBytes: MAX_RESPONSE_BYTES.search,
|
|
133
|
+
validate: parseResults,
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
results,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
async function searchForDomains(args: {
|
|
142
|
+
query: string;
|
|
143
|
+
maxResults: number;
|
|
144
|
+
freshness?: SearchProviderArgs["freshness"];
|
|
145
|
+
domains: string[];
|
|
146
|
+
signal: AbortSignal;
|
|
147
|
+
apiKey: string;
|
|
148
|
+
}): Promise<ProviderSearchResponse> {
|
|
149
|
+
if (args.domains.length === 1) {
|
|
150
|
+
const response = await searchOnce({
|
|
151
|
+
query: args.query,
|
|
152
|
+
maxResults: args.maxResults,
|
|
153
|
+
freshness: args.freshness,
|
|
154
|
+
domain: args.domains[0],
|
|
155
|
+
signal: args.signal,
|
|
156
|
+
apiKey: args.apiKey,
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
return {
|
|
160
|
+
...response,
|
|
161
|
+
appliedFilters: {
|
|
162
|
+
freshness: args.freshness ? "native" : undefined,
|
|
163
|
+
domains: "query_rewrite",
|
|
164
|
+
},
|
|
165
|
+
notes: [`Brave applied the domain filter with a site: rewrite for ${args.domains[0]}.`],
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const settled = await Promise.allSettled(
|
|
170
|
+
args.domains.map((domain) =>
|
|
171
|
+
searchOnce({
|
|
172
|
+
query: args.query,
|
|
173
|
+
maxResults: args.maxResults,
|
|
174
|
+
freshness: args.freshness,
|
|
175
|
+
domain,
|
|
176
|
+
signal: args.signal,
|
|
177
|
+
apiKey: args.apiKey,
|
|
178
|
+
}).then((response) => ({ domain, response })),
|
|
179
|
+
),
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
const results: SearchResult[] = [];
|
|
183
|
+
const notes: string[] = [];
|
|
184
|
+
const failures: Error[] = [];
|
|
185
|
+
|
|
186
|
+
for (const entry of settled) {
|
|
187
|
+
if (entry.status === "fulfilled") {
|
|
188
|
+
results.push(...entry.value.response.results);
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const error = entry.reason instanceof Error ? entry.reason : new Error(String(entry.reason));
|
|
193
|
+
if (isAbortError(error)) {
|
|
194
|
+
throw error;
|
|
195
|
+
}
|
|
196
|
+
failures.push(error);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
if (results.length === 0) {
|
|
200
|
+
throw (
|
|
201
|
+
failures[0] ??
|
|
202
|
+
new ProviderError({
|
|
203
|
+
provider: "brave",
|
|
204
|
+
message: "Brave request failed.",
|
|
205
|
+
transient: true,
|
|
206
|
+
})
|
|
207
|
+
);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (failures.length > 0) {
|
|
211
|
+
notes.push(
|
|
212
|
+
`Brave could not complete all per-domain searches for ${args.domains.join(", ")}; results may be incomplete.`,
|
|
213
|
+
);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
return {
|
|
217
|
+
results: dedupeResultsByUrl(results, args.maxResults),
|
|
218
|
+
appliedFilters: {
|
|
219
|
+
freshness: args.freshness ? "native" : undefined,
|
|
220
|
+
domains: "fanout_merge",
|
|
221
|
+
},
|
|
222
|
+
notes,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
export function createBraveProvider(apiKey: string): SearchProvider {
|
|
227
|
+
const trimmedApiKey = apiKey.trim();
|
|
228
|
+
if (!trimmedApiKey) {
|
|
229
|
+
throw new ProviderError({
|
|
230
|
+
provider: "brave",
|
|
231
|
+
message: "Brave API key is not configured.",
|
|
232
|
+
transient: false,
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return {
|
|
237
|
+
name: "brave",
|
|
238
|
+
capabilities: new Set(["search", "freshness"]),
|
|
239
|
+
async search(args: SearchProviderArgs): Promise<ProviderSearchResponse> {
|
|
240
|
+
const domains = normalizeDomains(args.domains);
|
|
241
|
+
try {
|
|
242
|
+
if (domains && domains.length > 0) {
|
|
243
|
+
return await searchForDomains({
|
|
244
|
+
query: args.query,
|
|
245
|
+
maxResults: args.maxResults,
|
|
246
|
+
freshness: args.freshness,
|
|
247
|
+
domains,
|
|
248
|
+
signal: args.signal,
|
|
249
|
+
apiKey: trimmedApiKey,
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
const response = await searchOnce({
|
|
254
|
+
query: args.query,
|
|
255
|
+
maxResults: args.maxResults,
|
|
256
|
+
freshness: args.freshness,
|
|
257
|
+
signal: args.signal,
|
|
258
|
+
apiKey: trimmedApiKey,
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
return {
|
|
262
|
+
results: dedupeResultsByUrl(response.results, args.maxResults),
|
|
263
|
+
appliedFilters: args.freshness ? { freshness: "native" } : undefined,
|
|
264
|
+
notes: args.freshness
|
|
265
|
+
? ["Brave applied the requested freshness filter natively."]
|
|
266
|
+
: undefined,
|
|
267
|
+
};
|
|
268
|
+
} catch (error) {
|
|
269
|
+
if (isAbortError(error) || args.signal.aborted) {
|
|
270
|
+
throw error;
|
|
271
|
+
}
|
|
272
|
+
if (error instanceof ProviderError) throw error;
|
|
273
|
+
throw new ProviderError({
|
|
274
|
+
provider: "brave",
|
|
275
|
+
message:
|
|
276
|
+
error instanceof Error && error.message ? error.message : "Brave request failed.",
|
|
277
|
+
transient: false,
|
|
278
|
+
cause: error,
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
},
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
export default createBraveProvider;
|
|
286
|
+
|
|
287
|
+
function isAbortError(error: unknown): boolean {
|
|
288
|
+
return (
|
|
289
|
+
(error instanceof DOMException && error.name === "AbortError") ||
|
|
290
|
+
(error instanceof Error && error.name === "AbortError")
|
|
291
|
+
);
|
|
292
|
+
}
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import {
|
|
2
|
+
MAX_RESPONSE_BYTES,
|
|
3
|
+
ProviderError,
|
|
4
|
+
TIMEOUTS,
|
|
5
|
+
fetchJson,
|
|
6
|
+
hostnameFromUrl,
|
|
7
|
+
normalizeIsoDate,
|
|
8
|
+
truncateSnippet,
|
|
9
|
+
} from "../provider-utils.js";
|
|
10
|
+
import type {
|
|
11
|
+
AppliedFilters,
|
|
12
|
+
ProviderSearchResponse,
|
|
13
|
+
SearchCapability,
|
|
14
|
+
SearchFreshness,
|
|
15
|
+
SearchProvider,
|
|
16
|
+
SearchProviderArgs,
|
|
17
|
+
SearchResult,
|
|
18
|
+
} from "../types.js";
|
|
19
|
+
|
|
20
|
+
const EXA_ENDPOINT = "https://api.exa.ai/search";
|
|
21
|
+
|
|
22
|
+
type ExaResult = {
|
|
23
|
+
title?: unknown;
|
|
24
|
+
url?: unknown;
|
|
25
|
+
text?: unknown;
|
|
26
|
+
highlights?: unknown;
|
|
27
|
+
publishedDate?: unknown;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
type ExaResponse = {
|
|
31
|
+
results: unknown[];
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const CAPABILITIES = new Set<SearchCapability>([
|
|
35
|
+
"search",
|
|
36
|
+
"content",
|
|
37
|
+
"semantic",
|
|
38
|
+
"freshness",
|
|
39
|
+
"domainFilter",
|
|
40
|
+
"resultDates",
|
|
41
|
+
]);
|
|
42
|
+
|
|
43
|
+
export function createExaProvider(apiKey: string): SearchProvider {
|
|
44
|
+
const trimmedApiKey = apiKey.trim();
|
|
45
|
+
if (!trimmedApiKey) {
|
|
46
|
+
throw new ProviderError({
|
|
47
|
+
provider: "exa",
|
|
48
|
+
message: "Exa API key is not configured.",
|
|
49
|
+
transient: false,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return {
|
|
54
|
+
name: "exa",
|
|
55
|
+
capabilities: CAPABILITIES,
|
|
56
|
+
async search(args: SearchProviderArgs): Promise<ProviderSearchResponse> {
|
|
57
|
+
const requestBody = buildRequestBody(args);
|
|
58
|
+
const response = await fetchJson<ExaResponse>("exa", EXA_ENDPOINT, {
|
|
59
|
+
method: "POST",
|
|
60
|
+
headers: {
|
|
61
|
+
"Content-Type": "application/json",
|
|
62
|
+
"x-api-key": trimmedApiKey,
|
|
63
|
+
},
|
|
64
|
+
body: JSON.stringify(requestBody),
|
|
65
|
+
signal: args.signal,
|
|
66
|
+
timeoutMs: args.includeContent ? TIMEOUTS.searchThoroughMs : TIMEOUTS.searchBasicMs,
|
|
67
|
+
maxBytes: MAX_RESPONSE_BYTES.search,
|
|
68
|
+
validate: validateResponseShape,
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
const results = normalizeResults(response.results, args.includeContent, args.maxResults);
|
|
72
|
+
const appliedFilters: AppliedFilters | undefined = {
|
|
73
|
+
freshness: args.freshness ? "native" : undefined,
|
|
74
|
+
domains: args.domains?.length ? "native" : undefined,
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
return {
|
|
78
|
+
results,
|
|
79
|
+
appliedFilters:
|
|
80
|
+
appliedFilters.freshness || appliedFilters.domains ? appliedFilters : undefined,
|
|
81
|
+
notes: buildNotes(args),
|
|
82
|
+
};
|
|
83
|
+
},
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export default createExaProvider;
|
|
88
|
+
|
|
89
|
+
function buildRequestBody(args: SearchProviderArgs): Record<string, unknown> {
|
|
90
|
+
const body: Record<string, unknown> = {
|
|
91
|
+
query: args.query,
|
|
92
|
+
numResults: args.maxResults,
|
|
93
|
+
type: "auto",
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
if (args.domains?.length) {
|
|
97
|
+
body.includeDomains = args.domains;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const startPublishedDate = freshnessToStartPublishedDate(args.freshness);
|
|
101
|
+
if (startPublishedDate) {
|
|
102
|
+
body.startPublishedDate = startPublishedDate;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (args.includeContent) {
|
|
106
|
+
body.contents = {
|
|
107
|
+
text: {
|
|
108
|
+
maxCharacters: 3_000,
|
|
109
|
+
},
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return body;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function validateResponseShape(value: unknown): ExaResponse {
|
|
117
|
+
if (!isPlainObject(value)) {
|
|
118
|
+
throw new Error("Exa returned unexpected response shape");
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const results = value.results;
|
|
122
|
+
if (!Array.isArray(results)) {
|
|
123
|
+
throw new Error("Exa returned unexpected response shape");
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return { results };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function normalizeResults(
|
|
130
|
+
results: unknown[],
|
|
131
|
+
includeContent: boolean,
|
|
132
|
+
maxResults: number,
|
|
133
|
+
): SearchResult[] {
|
|
134
|
+
const normalized: SearchResult[] = [];
|
|
135
|
+
|
|
136
|
+
for (const entry of results) {
|
|
137
|
+
if (!isPlainObject(entry)) continue;
|
|
138
|
+
|
|
139
|
+
const title = typeof entry.title === "string" ? entry.title.trim() : "";
|
|
140
|
+
const url = typeof entry.url === "string" ? entry.url.trim() : "";
|
|
141
|
+
if (!title || !url) continue;
|
|
142
|
+
|
|
143
|
+
const snippet = extractSnippet(entry);
|
|
144
|
+
const searchResult: SearchResult = {
|
|
145
|
+
title,
|
|
146
|
+
url,
|
|
147
|
+
snippet,
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
const sourceDomain = hostnameFromUrl(url);
|
|
151
|
+
if (sourceDomain) {
|
|
152
|
+
searchResult.sourceDomain = sourceDomain;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const publishedAt = normalizeIsoDate(
|
|
156
|
+
typeof entry.publishedDate === "string" ? entry.publishedDate : undefined,
|
|
157
|
+
);
|
|
158
|
+
if (publishedAt) {
|
|
159
|
+
searchResult.publishedAt = publishedAt;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (includeContent) {
|
|
163
|
+
const content = typeof entry.text === "string" ? entry.text.trim() : "";
|
|
164
|
+
if (content) {
|
|
165
|
+
searchResult.content = content;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
normalized.push(searchResult);
|
|
170
|
+
if (normalized.length >= maxResults) break;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return normalized;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function extractSnippet(entry: ExaResult): string {
|
|
177
|
+
if (typeof entry.text === "string" && entry.text.trim()) {
|
|
178
|
+
return truncateSnippet(entry.text, 300);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (Array.isArray(entry.highlights)) {
|
|
182
|
+
for (const highlight of entry.highlights) {
|
|
183
|
+
if (typeof highlight !== "string" || !highlight.trim()) continue;
|
|
184
|
+
return truncateSnippet(highlight, 300);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return "";
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function buildNotes(args: SearchProviderArgs): string[] {
|
|
192
|
+
const notes: string[] = [];
|
|
193
|
+
|
|
194
|
+
if (args.freshness) {
|
|
195
|
+
notes.push(`Exa applied freshness natively for "${args.freshness}" queries.`);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
if (args.domains?.length) {
|
|
199
|
+
notes.push(`Exa applied domain filtering natively for ${args.domains.join(", ")}.`);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if (args.includeContent) {
|
|
203
|
+
notes.push("Exa returned extracted page content for this request.");
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return notes;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
function freshnessToStartPublishedDate(freshness: SearchFreshness | undefined): string | undefined {
|
|
210
|
+
if (!freshness) return undefined;
|
|
211
|
+
|
|
212
|
+
const now = new Date();
|
|
213
|
+
const daysBack = {
|
|
214
|
+
day: 1,
|
|
215
|
+
week: 7,
|
|
216
|
+
month: 30,
|
|
217
|
+
year: 365,
|
|
218
|
+
}[freshness];
|
|
219
|
+
|
|
220
|
+
now.setUTCDate(now.getUTCDate() - daysBack);
|
|
221
|
+
now.setUTCHours(0, 0, 0, 0);
|
|
222
|
+
return now.toISOString();
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function isPlainObject(value: unknown): value is Record<string, unknown> {
|
|
226
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
227
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import type { FetchProvider } from "../types.js";
|
|
2
|
+
import { MAX_RESPONSE_BYTES, ProviderError, TIMEOUTS, fetchJson } from "../provider-utils.js";
|
|
3
|
+
|
|
4
|
+
const FIRECRAWL_ENDPOINT = "https://api.firecrawl.dev/v2/scrape";
|
|
5
|
+
|
|
6
|
+
export function createFirecrawlProvider(apiKey?: string | null): FetchProvider {
|
|
7
|
+
return {
|
|
8
|
+
name: "firecrawl",
|
|
9
|
+
async fetch(url: string, signal: AbortSignal): Promise<string> {
|
|
10
|
+
if (!apiKey?.trim()) {
|
|
11
|
+
throw new ProviderError({
|
|
12
|
+
provider: "firecrawl",
|
|
13
|
+
message: "firecrawl is not configured. Set FIRECRAWL_API_KEY to enable this provider.",
|
|
14
|
+
transient: false,
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const payload = {
|
|
19
|
+
url,
|
|
20
|
+
formats: ["markdown"],
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
const result = await fetchJson("firecrawl", FIRECRAWL_ENDPOINT, {
|
|
24
|
+
method: "POST",
|
|
25
|
+
headers: {
|
|
26
|
+
"Content-Type": "application/json",
|
|
27
|
+
Accept: "application/json",
|
|
28
|
+
Authorization: `Bearer ${apiKey.trim()}`,
|
|
29
|
+
},
|
|
30
|
+
body: JSON.stringify(payload),
|
|
31
|
+
signal,
|
|
32
|
+
timeoutMs: TIMEOUTS.fetchMs,
|
|
33
|
+
maxBytes: MAX_RESPONSE_BYTES.fetch,
|
|
34
|
+
validate: validateFirecrawlResponse,
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
return result;
|
|
38
|
+
},
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function validateFirecrawlResponse(value: unknown): string {
|
|
43
|
+
if (!isPlainObject(value)) {
|
|
44
|
+
throw new Error("Firecrawl returned unexpected response shape.");
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const data = value.data;
|
|
48
|
+
if (!isPlainObject(data)) {
|
|
49
|
+
throw new Error("Firecrawl returned unexpected response shape.");
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const markdown = data.markdown;
|
|
53
|
+
if (typeof markdown === "string" && markdown.trim()) {
|
|
54
|
+
return markdown.trim();
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const content = data.content;
|
|
58
|
+
if (typeof content === "string" && content.trim()) {
|
|
59
|
+
return content.trim();
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
throw new Error("Firecrawl returned unexpected response shape.");
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function isPlainObject(value: unknown): value is Record<string, unknown> {
|
|
66
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
67
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { createBraveProvider } from "./brave.js";
|
|
2
|
+
import { createExaProvider } from "./exa.js";
|
|
3
|
+
import { createFirecrawlProvider } from "./firecrawl.js";
|
|
4
|
+
import { createJinaProvider } from "./jina.js";
|
|
5
|
+
import createSerperProviderFactory from "./serper.js";
|
|
6
|
+
import { createTavilyProvider } from "./tavily.js";
|
|
7
|
+
import type { InitializedProviders, LoadedConfig } from "../types.js";
|
|
8
|
+
|
|
9
|
+
export function initProviders(config: LoadedConfig): InitializedProviders {
|
|
10
|
+
const search: InitializedProviders["search"] = {};
|
|
11
|
+
|
|
12
|
+
if (config.apiKeys.BRAVE_API_KEY) {
|
|
13
|
+
search.brave = createBraveProvider(config.apiKeys.BRAVE_API_KEY);
|
|
14
|
+
}
|
|
15
|
+
if (config.apiKeys.SERPER_API_KEY) {
|
|
16
|
+
search.serper = createSerperProviderFactory(config.apiKeys.SERPER_API_KEY);
|
|
17
|
+
}
|
|
18
|
+
if (config.apiKeys.TAVILY_API_KEY) {
|
|
19
|
+
search.tavily = createTavilyProvider(config.apiKeys.TAVILY_API_KEY);
|
|
20
|
+
}
|
|
21
|
+
if (config.apiKeys.EXA_API_KEY) {
|
|
22
|
+
search.exa = createExaProvider(config.apiKeys.EXA_API_KEY);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const fetch: InitializedProviders["fetch"] = {
|
|
26
|
+
jina: createJinaProvider(config.apiKeys.JINA_API_KEY),
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
if (config.apiKeys.FIRECRAWL_API_KEY) {
|
|
30
|
+
fetch.firecrawl = createFirecrawlProvider(config.apiKeys.FIRECRAWL_API_KEY);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return {
|
|
34
|
+
search,
|
|
35
|
+
fetch,
|
|
36
|
+
hasAnySearchProvider: Object.keys(search).length > 0,
|
|
37
|
+
};
|
|
38
|
+
}
|