pi-web-providers 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -2
- package/dist/index.js +180 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -66,7 +66,7 @@ reported when the tool is actually called.
|
|
|
66
66
|
| **Brave** | ✔ | | ✔ | ✔ | `BRAVE_SEARCH_API_KEY` / `BRAVE_ANSWERS_API_KEY` |
|
|
67
67
|
| **Cloudflare** | | ✔ | | | `CLOUDFLARE_API_TOKEN` + `CLOUDFLARE_ACCOUNT_ID` |
|
|
68
68
|
| **Exa** | ✔ | ✔ | ✔ | ✔ | `EXA_API_KEY` |
|
|
69
|
-
| **Firecrawl** | ✔ | ✔ |
|
|
69
|
+
| **Firecrawl** | ✔ | ✔ | ✔ | | `FIRECRAWL_API_KEY` |
|
|
70
70
|
| **Gemini** | ✔ | | ✔ | ✔ | `GOOGLE_API_KEY` |
|
|
71
71
|
| **Linkup** | ✔ | ✔ | | ✔ | `LINKUP_API_KEY` |
|
|
72
72
|
| **Ollama** | ✔ | ✔ | | | `OLLAMA_API_KEY` |
|
|
@@ -315,13 +315,19 @@ scope, or account ID is usually wrong.
|
|
|
315
315
|
<summary><strong>Firecrawl</strong></summary>
|
|
316
316
|
|
|
317
317
|
- SDK: `@mendable/firecrawl-js`
|
|
318
|
-
- Supports `web_search` and `
|
|
318
|
+
- Supports `web_search`, `web_contents`, and page-scoped `web_answer`
|
|
319
319
|
- Search can optionally include Firecrawl scrape-backed result enrichment
|
|
320
320
|
- Contents extraction uses Firecrawl scrape with markdown-first defaults
|
|
321
|
+
- Answers use Firecrawl scrape's `question` format against one explicit page URL;
|
|
322
|
+
set `options.url` in the `web_answer` call or
|
|
323
|
+
`providers.firecrawl.options.answer.url` as a default
|
|
321
324
|
- Exposes search options such as `lang`, `country`, `sources`, `categories`,
|
|
322
325
|
`location`, `timeout`, and `scrapeOptions`
|
|
323
326
|
- Exposes contents options such as `formats`, `onlyMainContent`, `includeTags`,
|
|
324
327
|
`excludeTags`, `waitFor`, `headers`, `location`, `mobile`, and `proxy`
|
|
328
|
+
- Exposes answer options `url`, `onlyMainContent`, `includeTags`,
|
|
329
|
+
`excludeTags`, `waitFor`, `headers`, `location`, `mobile`, and `proxy`
|
|
330
|
+
- Firecrawl charges 5 credits per page for the `question` format
|
|
325
331
|
- Optional `baseUrl` overrides are supported for self-hosted Firecrawl
|
|
326
332
|
instances, proxies, and testing. API keys are required for Firecrawl Cloud,
|
|
327
333
|
but can be omitted for self-hosted endpoints that do not enforce
|
package/dist/index.js
CHANGED
|
@@ -3097,6 +3097,8 @@ var exaProvider = defineProvider({
|
|
|
3097
3097
|
import FirecrawlClient from "@mendable/firecrawl-js";
|
|
3098
3098
|
import { Type as Type7 } from "typebox";
|
|
3099
3099
|
var FIRECRAWL_CLOUD_HOST = "api.firecrawl.dev";
|
|
3100
|
+
var FIRECRAWL_DEFAULT_API_URL = "https://api.firecrawl.dev";
|
|
3101
|
+
var FIRECRAWL_QUESTION_LIMIT = 1e4;
|
|
3100
3102
|
var firecrawlSearchOptionsSchema = Type7.Object(
|
|
3101
3103
|
{
|
|
3102
3104
|
lang: Type7.Optional(
|
|
@@ -3203,6 +3205,55 @@ var firecrawlScrapeOptionsSchema = Type7.Object(
|
|
|
3203
3205
|
},
|
|
3204
3206
|
{ description: "Firecrawl scrape options." }
|
|
3205
3207
|
);
|
|
3208
|
+
var firecrawlAnswerOptionsSchema = Type7.Object(
|
|
3209
|
+
{
|
|
3210
|
+
url: Type7.String({
|
|
3211
|
+
minLength: 1,
|
|
3212
|
+
description: "URL of the page to ask about."
|
|
3213
|
+
}),
|
|
3214
|
+
onlyMainContent: Type7.Optional(
|
|
3215
|
+
Type7.Boolean({ description: "Extract only the main content." })
|
|
3216
|
+
),
|
|
3217
|
+
includeTags: Type7.Optional(
|
|
3218
|
+
Type7.Array(Type7.String(), { description: "CSS selectors to include." })
|
|
3219
|
+
),
|
|
3220
|
+
excludeTags: Type7.Optional(
|
|
3221
|
+
Type7.Array(Type7.String(), { description: "CSS selectors to exclude." })
|
|
3222
|
+
),
|
|
3223
|
+
waitFor: Type7.Optional(
|
|
3224
|
+
Type7.Integer({
|
|
3225
|
+
minimum: 0,
|
|
3226
|
+
description: "Milliseconds to wait before scraping."
|
|
3227
|
+
})
|
|
3228
|
+
),
|
|
3229
|
+
headers: Type7.Optional(
|
|
3230
|
+
Type7.Record(Type7.String(), Type7.String(), {
|
|
3231
|
+
description: "Headers to send when scraping."
|
|
3232
|
+
})
|
|
3233
|
+
),
|
|
3234
|
+
location: Type7.Optional(
|
|
3235
|
+
Type7.Object(
|
|
3236
|
+
{
|
|
3237
|
+
country: Type7.Optional(Type7.String({ description: "Country hint." })),
|
|
3238
|
+
region: Type7.Optional(Type7.String({ description: "Region hint." })),
|
|
3239
|
+
city: Type7.Optional(Type7.String({ description: "City hint." }))
|
|
3240
|
+
},
|
|
3241
|
+
{ description: "Location hint for scraping." }
|
|
3242
|
+
)
|
|
3243
|
+
),
|
|
3244
|
+
mobile: Type7.Optional(
|
|
3245
|
+
Type7.Boolean({ description: "Use a mobile browser profile." })
|
|
3246
|
+
),
|
|
3247
|
+
proxy: Type7.Optional(
|
|
3248
|
+
Type7.String({
|
|
3249
|
+
description: "Proxy mode passed through to Firecrawl."
|
|
3250
|
+
})
|
|
3251
|
+
)
|
|
3252
|
+
},
|
|
3253
|
+
{
|
|
3254
|
+
description: "Firecrawl page-question options. The URL is required; the question comes from the web_answer query."
|
|
3255
|
+
}
|
|
3256
|
+
);
|
|
3206
3257
|
var firecrawlImplementation = {
|
|
3207
3258
|
id: "firecrawl",
|
|
3208
3259
|
label: "Firecrawl",
|
|
@@ -3213,6 +3264,8 @@ var firecrawlImplementation = {
|
|
|
3213
3264
|
return firecrawlSearchOptionsSchema;
|
|
3214
3265
|
case "contents":
|
|
3215
3266
|
return firecrawlScrapeOptionsSchema;
|
|
3267
|
+
case "answer":
|
|
3268
|
+
return firecrawlAnswerOptionsSchema;
|
|
3216
3269
|
default:
|
|
3217
3270
|
return void 0;
|
|
3218
3271
|
}
|
|
@@ -3279,6 +3332,34 @@ var firecrawlImplementation = {
|
|
|
3279
3332
|
})
|
|
3280
3333
|
)
|
|
3281
3334
|
};
|
|
3335
|
+
},
|
|
3336
|
+
async answer(query2, config, _context, options) {
|
|
3337
|
+
const question = validateQuestion(query2);
|
|
3338
|
+
const defaults = asJsonObject(config.options?.scrape);
|
|
3339
|
+
const answerDefaults = asJsonObject(config.options?.answer);
|
|
3340
|
+
const mergedOptions = {
|
|
3341
|
+
onlyMainContent: true,
|
|
3342
|
+
...defaults,
|
|
3343
|
+
...answerDefaults,
|
|
3344
|
+
...options ?? {}
|
|
3345
|
+
};
|
|
3346
|
+
const url2 = validateUrl(mergedOptions.url);
|
|
3347
|
+
const scrapeOptions = stripAnswerOnlyOptions(mergedOptions);
|
|
3348
|
+
const response = await scrapeQuestion(config, url2, question, scrapeOptions);
|
|
3349
|
+
const document = getFirecrawlDocument(response);
|
|
3350
|
+
const answer = readString2(document.answer);
|
|
3351
|
+
if (!answer?.trim()) {
|
|
3352
|
+
throw new Error("No answer returned for this URL.");
|
|
3353
|
+
}
|
|
3354
|
+
return {
|
|
3355
|
+
provider: firecrawlImplementation.id,
|
|
3356
|
+
text: answer.trim(),
|
|
3357
|
+
itemCount: 1,
|
|
3358
|
+
metadata: {
|
|
3359
|
+
url: url2,
|
|
3360
|
+
...asRecord(document.metadata) ? { metadata: document.metadata } : {}
|
|
3361
|
+
}
|
|
3362
|
+
};
|
|
3282
3363
|
}
|
|
3283
3364
|
};
|
|
3284
3365
|
function createClient3(config) {
|
|
@@ -3302,6 +3383,88 @@ function getFirecrawlCapabilityStatus(config, options) {
|
|
|
3302
3383
|
function isFirecrawlCloudApiUrl(apiUrl) {
|
|
3303
3384
|
return !apiUrl || apiUrl.includes(FIRECRAWL_CLOUD_HOST);
|
|
3304
3385
|
}
|
|
3386
|
+
function validateQuestion(query2) {
|
|
3387
|
+
const question = query2.trim();
|
|
3388
|
+
if (!question) {
|
|
3389
|
+
throw new Error("question must be a non-empty string.");
|
|
3390
|
+
}
|
|
3391
|
+
if (question.length > FIRECRAWL_QUESTION_LIMIT) {
|
|
3392
|
+
throw new Error(
|
|
3393
|
+
`Firecrawl question must be at most ${FIRECRAWL_QUESTION_LIMIT} characters.`
|
|
3394
|
+
);
|
|
3395
|
+
}
|
|
3396
|
+
return question;
|
|
3397
|
+
}
|
|
3398
|
+
function validateUrl(value) {
|
|
3399
|
+
if (typeof value !== "string" || !value.trim()) {
|
|
3400
|
+
throw new Error("Firecrawl answer requires options.url.");
|
|
3401
|
+
}
|
|
3402
|
+
return value.trim();
|
|
3403
|
+
}
|
|
3404
|
+
function stripAnswerOnlyOptions(options) {
|
|
3405
|
+
const { url: _url, formats: _formats, ...scrapeOptions } = options;
|
|
3406
|
+
return scrapeOptions;
|
|
3407
|
+
}
|
|
3408
|
+
async function scrapeQuestion(config, url2, question, options) {
|
|
3409
|
+
const apiUrl = resolveConfigValue(config.baseUrl) ?? FIRECRAWL_DEFAULT_API_URL;
|
|
3410
|
+
const apiKey = resolveConfigValue(config.credentials?.api);
|
|
3411
|
+
if (isFirecrawlCloudApiUrl(apiUrl) && !apiKey) {
|
|
3412
|
+
throw new Error("is missing an API key");
|
|
3413
|
+
}
|
|
3414
|
+
const response = await fetch(joinUrl(apiUrl, "/v2/scrape"), {
|
|
3415
|
+
method: "POST",
|
|
3416
|
+
headers: {
|
|
3417
|
+
"Content-Type": "application/json",
|
|
3418
|
+
...apiKey ? { Authorization: `Bearer ${apiKey}` } : {}
|
|
3419
|
+
},
|
|
3420
|
+
body: JSON.stringify({
|
|
3421
|
+
...options,
|
|
3422
|
+
url: url2,
|
|
3423
|
+
formats: [{ type: "question", question }]
|
|
3424
|
+
})
|
|
3425
|
+
});
|
|
3426
|
+
const payload = await readJsonResponse(response);
|
|
3427
|
+
if (!response.ok) {
|
|
3428
|
+
throw new Error(readFirecrawlError(payload, response.statusText));
|
|
3429
|
+
}
|
|
3430
|
+
if (isFirecrawlFailure(payload)) {
|
|
3431
|
+
throw new Error(readFirecrawlError(payload, "Firecrawl scrape failed."));
|
|
3432
|
+
}
|
|
3433
|
+
return payload;
|
|
3434
|
+
}
|
|
3435
|
+
function joinUrl(baseUrl, path) {
|
|
3436
|
+
return `${baseUrl.replace(/\/+$/g, "")}/${path.replace(/^\/+/g, "")}`;
|
|
3437
|
+
}
|
|
3438
|
+
async function readJsonResponse(response) {
|
|
3439
|
+
const text = await response.text();
|
|
3440
|
+
if (!text) {
|
|
3441
|
+
return {};
|
|
3442
|
+
}
|
|
3443
|
+
try {
|
|
3444
|
+
return JSON.parse(text);
|
|
3445
|
+
} catch {
|
|
3446
|
+
return text;
|
|
3447
|
+
}
|
|
3448
|
+
}
|
|
3449
|
+
function isFirecrawlFailure(value) {
|
|
3450
|
+
const record = asRecord(value);
|
|
3451
|
+
return record?.success === false || record?.error !== void 0;
|
|
3452
|
+
}
|
|
3453
|
+
function readFirecrawlError(value, fallback) {
|
|
3454
|
+
const record = asRecord(value);
|
|
3455
|
+
return readString2(record?.error) ?? readString2(record?.message) ?? (typeof value === "string" ? value : void 0) ?? fallback;
|
|
3456
|
+
}
|
|
3457
|
+
function getFirecrawlDocument(value) {
|
|
3458
|
+
const record = asRecord(value);
|
|
3459
|
+
const data = asRecord(record?.data);
|
|
3460
|
+
if (data) {
|
|
3461
|
+
return data;
|
|
3462
|
+
}
|
|
3463
|
+
if (record) {
|
|
3464
|
+
return record;
|
|
3465
|
+
}
|
|
3466
|
+
throw new Error(`Unexpected Firecrawl response: ${formatJson(value)}`);
|
|
3467
|
+
}
|
|
3305
3468
|
function flattenSearchResults(response) {
|
|
3306
3469
|
return ["web", "news", "images"].flatMap(
|
|
3307
3470
|
(source) => (response[source] ?? []).map((entry) => toSearchResult(source, entry)).filter((entry) => entry !== null)
|
|
@@ -3389,6 +3552,21 @@ var firecrawlProvider = defineProvider({
|
|
|
3389
3552
|
input.options
|
|
3390
3553
|
);
|
|
3391
3554
|
}
|
|
3555
|
+
}),
|
|
3556
|
+
answer: defineCapability({
|
|
3557
|
+
options: firecrawlImplementation.getToolOptionsSchema?.("answer"),
|
|
3558
|
+
promptGuidelines: [
|
|
3559
|
+
"Firecrawl web_answer is page-scoped: set options.url to the specific page URL to ask about.",
|
|
3560
|
+
"Do not use Firecrawl web_answer for general multi-source answers; use web_search plus web_contents or web_research instead."
|
|
3561
|
+
],
|
|
3562
|
+
async execute(input, ctx) {
|
|
3563
|
+
return await firecrawlImplementation.answer(
|
|
3564
|
+
input.query,
|
|
3565
|
+
ctx.config,
|
|
3566
|
+
ctx,
|
|
3567
|
+
input.options
|
|
3568
|
+
);
|
|
3569
|
+
}
|
|
3392
3570
|
})
|
|
3393
3571
|
}
|
|
3394
3572
|
});
|
|
@@ -6040,7 +6218,7 @@ var serperImplementation = {
|
|
|
6040
6218
|
requestOptions
|
|
6041
6219
|
);
|
|
6042
6220
|
const response = await fetch(
|
|
6043
|
-
|
|
6221
|
+
joinUrl2(resolveConfigValue(config.baseUrl), requestOptions.mode),
|
|
6044
6222
|
{
|
|
6045
6223
|
method: "POST",
|
|
6046
6224
|
headers: {
|
|
@@ -6075,7 +6253,7 @@ var serperImplementation = {
|
|
|
6075
6253
|
};
|
|
6076
6254
|
}
|
|
6077
6255
|
};
|
|
6078
|
-
function
|
|
6256
|
+
function joinUrl2(baseUrl, mode = "search") {
|
|
6079
6257
|
const base2 = (baseUrl ?? DEFAULT_BASE_URL3).replace(/\/+$/, "");
|
|
6080
6258
|
if (mode === "webpage" && base2 === DEFAULT_BASE_URL3) {
|
|
6081
6259
|
return DEFAULT_SCRAPE_URL;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-web-providers",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.3.0",
|
|
4
4
|
"description": "Configurable web access extension for pi with per-tool provider routing and explicit provider option schemas for search, contents, quick grounded answers, and research.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|