pi-web-providers 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +8 -2
  2. package/dist/index.js +180 -2
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -66,7 +66,7 @@ reported when the tool is actually called.
66
66
  | **Brave** | ✔ | | ✔ | ✔ | `BRAVE_SEARCH_API_KEY` / `BRAVE_ANSWERS_API_KEY` |
67
67
  | **Cloudflare** | | ✔ | | | `CLOUDFLARE_API_TOKEN` + `CLOUDFLARE_ACCOUNT_ID` |
68
68
  | **Exa** | ✔ | ✔ | ✔ | ✔ | `EXA_API_KEY` |
69
- | **Firecrawl** | ✔ | ✔ | | | `FIRECRAWL_API_KEY` |
69
+ | **Firecrawl** | ✔ | ✔ || | `FIRECRAWL_API_KEY` |
70
70
  | **Gemini** | ✔ | | ✔ | ✔ | `GOOGLE_API_KEY` |
71
71
  | **Linkup** | ✔ | ✔ | | ✔ | `LINKUP_API_KEY` |
72
72
  | **Ollama** | ✔ | ✔ | | | `OLLAMA_API_KEY` |
@@ -315,13 +315,19 @@ scope, or account ID is usually wrong.
315
315
  <summary><strong>Firecrawl</strong></summary>
316
316
 
317
317
  - SDK: `@mendable/firecrawl-js`
318
- - Supports `web_search` and `web_contents`
318
+ - Supports `web_search`, `web_contents`, and page-scoped `web_answer`
319
319
  - Search can optionally include Firecrawl scrape-backed result enrichment
320
320
  - Contents extraction uses Firecrawl scrape with markdown-first defaults
321
+ - Answers use Firecrawl scrape's `question` format against one explicit page URL;
322
+ set `options.url` in the `web_answer` call or
323
+ `providers.firecrawl.options.answer.url` as a default
321
324
  - Exposes search options such as `lang`, `country`, `sources`, `categories`,
322
325
  `location`, `timeout`, and `scrapeOptions`
323
326
  - Exposes contents options such as `formats`, `onlyMainContent`, `includeTags`,
324
327
  `excludeTags`, `waitFor`, `headers`, `location`, `mobile`, and `proxy`
328
+ - Exposes answer options `url`, `onlyMainContent`, `includeTags`,
329
+ `excludeTags`, `waitFor`, `headers`, `location`, `mobile`, and `proxy`
330
+ - Firecrawl charges 5 credits per page for the `question` format
325
331
  - Optional `baseUrl` overrides are supported for self-hosted Firecrawl
326
332
  instances, proxies, and testing. API keys are required for Firecrawl Cloud,
327
333
  but can be omitted for self-hosted endpoints that do not enforce
package/dist/index.js CHANGED
@@ -3097,6 +3097,8 @@ var exaProvider = defineProvider({
3097
3097
  import FirecrawlClient from "@mendable/firecrawl-js";
3098
3098
  import { Type as Type7 } from "typebox";
3099
3099
  var FIRECRAWL_CLOUD_HOST = "api.firecrawl.dev";
3100
+ var FIRECRAWL_DEFAULT_API_URL = "https://api.firecrawl.dev";
3101
+ var FIRECRAWL_QUESTION_LIMIT = 1e4;
3100
3102
  var firecrawlSearchOptionsSchema = Type7.Object(
3101
3103
  {
3102
3104
  lang: Type7.Optional(
@@ -3203,6 +3205,55 @@ var firecrawlScrapeOptionsSchema = Type7.Object(
3203
3205
  },
3204
3206
  { description: "Firecrawl scrape options." }
3205
3207
  );
3208
+ var firecrawlAnswerOptionsSchema = Type7.Object(
3209
+ {
3210
+ url: Type7.String({
3211
+ minLength: 1,
3212
+ description: "URL of the page to ask about."
3213
+ }),
3214
+ onlyMainContent: Type7.Optional(
3215
+ Type7.Boolean({ description: "Extract only the main content." })
3216
+ ),
3217
+ includeTags: Type7.Optional(
3218
+ Type7.Array(Type7.String(), { description: "CSS selectors to include." })
3219
+ ),
3220
+ excludeTags: Type7.Optional(
3221
+ Type7.Array(Type7.String(), { description: "CSS selectors to exclude." })
3222
+ ),
3223
+ waitFor: Type7.Optional(
3224
+ Type7.Integer({
3225
+ minimum: 0,
3226
+ description: "Milliseconds to wait before scraping."
3227
+ })
3228
+ ),
3229
+ headers: Type7.Optional(
3230
+ Type7.Record(Type7.String(), Type7.String(), {
3231
+ description: "Headers to send when scraping."
3232
+ })
3233
+ ),
3234
+ location: Type7.Optional(
3235
+ Type7.Object(
3236
+ {
3237
+ country: Type7.Optional(Type7.String({ description: "Country hint." })),
3238
+ region: Type7.Optional(Type7.String({ description: "Region hint." })),
3239
+ city: Type7.Optional(Type7.String({ description: "City hint." }))
3240
+ },
3241
+ { description: "Location hint for scraping." }
3242
+ )
3243
+ ),
3244
+ mobile: Type7.Optional(
3245
+ Type7.Boolean({ description: "Use a mobile browser profile." })
3246
+ ),
3247
+ proxy: Type7.Optional(
3248
+ Type7.String({
3249
+ description: "Proxy mode passed through to Firecrawl."
3250
+ })
3251
+ )
3252
+ },
3253
+ {
3254
+ description: "Firecrawl page-question options. The URL is required; the question comes from the web_answer query."
3255
+ }
3256
+ );
3206
3257
  var firecrawlImplementation = {
3207
3258
  id: "firecrawl",
3208
3259
  label: "Firecrawl",
@@ -3213,6 +3264,8 @@ var firecrawlImplementation = {
3213
3264
  return firecrawlSearchOptionsSchema;
3214
3265
  case "contents":
3215
3266
  return firecrawlScrapeOptionsSchema;
3267
+ case "answer":
3268
+ return firecrawlAnswerOptionsSchema;
3216
3269
  default:
3217
3270
  return void 0;
3218
3271
  }
@@ -3279,6 +3332,34 @@ var firecrawlImplementation = {
3279
3332
  })
3280
3333
  )
3281
3334
  };
3335
+ },
3336
+ async answer(query2, config, _context, options) {
3337
+ const question = validateQuestion(query2);
3338
+ const defaults = asJsonObject(config.options?.scrape);
3339
+ const answerDefaults = asJsonObject(config.options?.answer);
3340
+ const mergedOptions = {
3341
+ onlyMainContent: true,
3342
+ ...defaults,
3343
+ ...answerDefaults,
3344
+ ...options ?? {}
3345
+ };
3346
+ const url2 = validateUrl(mergedOptions.url);
3347
+ const scrapeOptions = stripAnswerOnlyOptions(mergedOptions);
3348
+ const response = await scrapeQuestion(config, url2, question, scrapeOptions);
3349
+ const document = getFirecrawlDocument(response);
3350
+ const answer = readString2(document.answer);
3351
+ if (!answer?.trim()) {
3352
+ throw new Error("No answer returned for this URL.");
3353
+ }
3354
+ return {
3355
+ provider: firecrawlImplementation.id,
3356
+ text: answer.trim(),
3357
+ itemCount: 1,
3358
+ metadata: {
3359
+ url: url2,
3360
+ ...asRecord(document.metadata) ? { metadata: document.metadata } : {}
3361
+ }
3362
+ };
3282
3363
  }
3283
3364
  };
3284
3365
  function createClient3(config) {
@@ -3302,6 +3383,88 @@ function getFirecrawlCapabilityStatus(config, options) {
3302
3383
  function isFirecrawlCloudApiUrl(apiUrl) {
3303
3384
  return !apiUrl || apiUrl.includes(FIRECRAWL_CLOUD_HOST);
3304
3385
  }
3386
+ function validateQuestion(query2) {
3387
+ const question = query2.trim();
3388
+ if (!question) {
3389
+ throw new Error("question must be a non-empty string.");
3390
+ }
3391
+ if (question.length > FIRECRAWL_QUESTION_LIMIT) {
3392
+ throw new Error(
3393
+ `Firecrawl question must be at most ${FIRECRAWL_QUESTION_LIMIT} characters.`
3394
+ );
3395
+ }
3396
+ return question;
3397
+ }
3398
+ function validateUrl(value) {
3399
+ if (typeof value !== "string" || !value.trim()) {
3400
+ throw new Error("Firecrawl answer requires options.url.");
3401
+ }
3402
+ return value.trim();
3403
+ }
3404
+ function stripAnswerOnlyOptions(options) {
3405
+ const { url: _url, formats: _formats, ...scrapeOptions } = options;
3406
+ return scrapeOptions;
3407
+ }
3408
+ async function scrapeQuestion(config, url2, question, options) {
3409
+ const apiUrl = resolveConfigValue(config.baseUrl) ?? FIRECRAWL_DEFAULT_API_URL;
3410
+ const apiKey = resolveConfigValue(config.credentials?.api);
3411
+ if (isFirecrawlCloudApiUrl(apiUrl) && !apiKey) {
3412
+ throw new Error("is missing an API key");
3413
+ }
3414
+ const response = await fetch(joinUrl(apiUrl, "/v2/scrape"), {
3415
+ method: "POST",
3416
+ headers: {
3417
+ "Content-Type": "application/json",
3418
+ ...apiKey ? { Authorization: `Bearer ${apiKey}` } : {}
3419
+ },
3420
+ body: JSON.stringify({
3421
+ ...options,
3422
+ url: url2,
3423
+ formats: [{ type: "question", question }]
3424
+ })
3425
+ });
3426
+ const payload = await readJsonResponse(response);
3427
+ if (!response.ok) {
3428
+ throw new Error(readFirecrawlError(payload, response.statusText));
3429
+ }
3430
+ if (isFirecrawlFailure(payload)) {
3431
+ throw new Error(readFirecrawlError(payload, "Firecrawl scrape failed."));
3432
+ }
3433
+ return payload;
3434
+ }
3435
+ function joinUrl(baseUrl, path) {
3436
+ return `${baseUrl.replace(/\/+$/g, "")}/${path.replace(/^\/+/g, "")}`;
3437
+ }
3438
+ async function readJsonResponse(response) {
3439
+ const text = await response.text();
3440
+ if (!text) {
3441
+ return {};
3442
+ }
3443
+ try {
3444
+ return JSON.parse(text);
3445
+ } catch {
3446
+ return text;
3447
+ }
3448
+ }
3449
+ function isFirecrawlFailure(value) {
3450
+ const record = asRecord(value);
3451
+ return record?.success === false || record?.error !== void 0;
3452
+ }
3453
+ function readFirecrawlError(value, fallback) {
3454
+ const record = asRecord(value);
3455
+ return readString2(record?.error) ?? readString2(record?.message) ?? (typeof value === "string" ? value : void 0) ?? fallback;
3456
+ }
3457
+ function getFirecrawlDocument(value) {
3458
+ const record = asRecord(value);
3459
+ const data = asRecord(record?.data);
3460
+ if (data) {
3461
+ return data;
3462
+ }
3463
+ if (record) {
3464
+ return record;
3465
+ }
3466
+ throw new Error(`Unexpected Firecrawl response: ${formatJson(value)}`);
3467
+ }
3305
3468
  function flattenSearchResults(response) {
3306
3469
  return ["web", "news", "images"].flatMap(
3307
3470
  (source) => (response[source] ?? []).map((entry) => toSearchResult(source, entry)).filter((entry) => entry !== null)
@@ -3389,6 +3552,21 @@ var firecrawlProvider = defineProvider({
3389
3552
  input.options
3390
3553
  );
3391
3554
  }
3555
+ }),
3556
+ answer: defineCapability({
3557
+ options: firecrawlImplementation.getToolOptionsSchema?.("answer"),
3558
+ promptGuidelines: [
3559
+ "Firecrawl web_answer is page-scoped: set options.url to the specific page URL to ask about.",
3560
+ "Do not use Firecrawl web_answer for general multi-source answers; use web_search plus web_contents or web_research instead."
3561
+ ],
3562
+ async execute(input, ctx) {
3563
+ return await firecrawlImplementation.answer(
3564
+ input.query,
3565
+ ctx.config,
3566
+ ctx,
3567
+ input.options
3568
+ );
3569
+ }
3392
3570
  })
3393
3571
  }
3394
3572
  });
@@ -6040,7 +6218,7 @@ var serperImplementation = {
6040
6218
  requestOptions
6041
6219
  );
6042
6220
  const response = await fetch(
6043
- joinUrl(resolveConfigValue(config.baseUrl), requestOptions.mode),
6221
+ joinUrl2(resolveConfigValue(config.baseUrl), requestOptions.mode),
6044
6222
  {
6045
6223
  method: "POST",
6046
6224
  headers: {
@@ -6075,7 +6253,7 @@ var serperImplementation = {
6075
6253
  };
6076
6254
  }
6077
6255
  };
6078
- function joinUrl(baseUrl, mode = "search") {
6256
+ function joinUrl2(baseUrl, mode = "search") {
6079
6257
  const base2 = (baseUrl ?? DEFAULT_BASE_URL3).replace(/\/+$/, "");
6080
6258
  if (mode === "webpage" && base2 === DEFAULT_BASE_URL3) {
6081
6259
  return DEFAULT_SCRAPE_URL;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-web-providers",
3
- "version": "3.2.0",
3
+ "version": "3.3.0",
4
4
  "description": "Configurable web access extension for pi with per-tool provider routing and explicit provider option schemas for search, contents, quick grounded answers, and research.",
5
5
  "type": "module",
6
6
  "files": [