@poolzin/pool-bot 2026.2.21 → 2026.2.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/dist/agents/api-key-rotation.js +47 -0
- package/dist/agents/apply-patch-update.js +19 -9
- package/dist/agents/apply-patch.js +72 -47
- package/dist/agents/bash-tools.exec.js +141 -559
- package/dist/agents/cli-backends.js +49 -6
- package/dist/agents/cli-runner/helpers.js +69 -152
- package/dist/agents/cli-runner.js +70 -19
- package/dist/agents/identity.js +20 -1
- package/dist/agents/image-sanitization.js +9 -0
- package/dist/agents/live-auth-keys.js +123 -26
- package/dist/agents/live-model-filter.js +13 -4
- package/dist/agents/model-catalog.js +40 -9
- package/dist/agents/model-forward-compat.js +60 -23
- package/dist/agents/model-selection.js +134 -41
- package/dist/agents/pi-auth-json.js +2 -2
- package/dist/agents/pi-embedded-helpers/bootstrap.js +65 -15
- package/dist/agents/pi-embedded-helpers/errors.js +140 -15
- package/dist/agents/pi-embedded-helpers/images.js +22 -12
- package/dist/agents/pi-embedded-helpers.js +2 -2
- package/dist/agents/pi-embedded-runner/abort.js +10 -3
- package/dist/agents/pi-embedded-runner/compact.js +230 -32
- package/dist/agents/pi-embedded-runner/extra-params.js +203 -12
- package/dist/agents/pi-embedded-runner/google.js +109 -19
- package/dist/agents/pi-embedded-runner/history.js +35 -17
- package/dist/agents/pi-embedded-runner/run/attempt.js +386 -95
- package/dist/agents/pi-embedded-runner/run/images.js +81 -55
- package/dist/agents/pi-embedded-runner/run/payloads.js +89 -39
- package/dist/agents/pi-embedded-runner/run.js +193 -25
- package/dist/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.js +2 -2
- package/dist/agents/pi-embedded-runner/runs.js +17 -8
- package/dist/agents/pi-embedded-runner/tool-result-context-guard.js +262 -0
- package/dist/agents/pi-embedded-runner.js +1 -1
- package/dist/agents/pi-embedded-subscribe.handlers.tools.js +180 -10
- package/dist/agents/pi-embedded-subscribe.js +37 -0
- package/dist/agents/pi-embedded-subscribe.tools.js +127 -30
- package/dist/agents/pi-model-discovery.js +9 -2
- package/dist/agents/pi-tool-definition-adapter.js +60 -8
- package/dist/agents/pi-tools.before-tool-call.js +1 -1
- package/dist/agents/pi-tools.js +113 -94
- package/dist/agents/pi-tools.read.js +337 -38
- package/dist/agents/poolbot-tools.js +14 -5
- package/dist/agents/sandbox/docker.js +10 -5
- package/dist/agents/sandbox/registry.js +96 -46
- package/dist/agents/sandbox/sanitize-env-vars.js +82 -0
- package/dist/agents/sandbox-paths.js +43 -10
- package/dist/agents/session-tool-result-guard-wrapper.js +23 -11
- package/dist/agents/session-tool-result-guard.js +39 -39
- package/dist/agents/session-transcript-repair.js +36 -33
- package/dist/agents/session-write-lock.js +62 -44
- package/dist/agents/skills/frontmatter.js +49 -88
- package/dist/agents/skills/workspace.js +335 -28
- package/dist/agents/subagent-announce.js +508 -174
- package/dist/agents/subagent-registry.js +45 -4
- package/dist/agents/subagent-spawn.js +16 -33
- package/dist/agents/system-prompt-report.js +27 -10
- package/dist/agents/system-prompt.js +26 -32
- package/dist/agents/tool-call-id.js +69 -17
- package/dist/agents/tool-display-common.js +1 -1
- package/dist/agents/tool-images.js +64 -31
- package/dist/agents/tools/canvas-tool.js +17 -11
- package/dist/agents/tools/common.js +37 -19
- package/dist/agents/tools/cron-tool.js +40 -38
- package/dist/agents/tools/gateway.js +70 -2
- package/dist/agents/tools/message-tool.js +181 -40
- package/dist/agents/tools/nodes-tool.js +128 -36
- package/dist/agents/tools/nodes-utils.js +12 -38
- package/dist/agents/tools/session-status-tool.js +24 -71
- package/dist/agents/tools/sessions-helpers.js +38 -210
- package/dist/agents/tools/sessions-spawn-tool.js +28 -198
- package/dist/agents/tools/telegram-actions.js +58 -7
- package/dist/agents/tools/web-fetch-utils.js +112 -7
- package/dist/agents/tools/web-fetch.js +279 -175
- package/dist/agents/tools/web-shared.js +71 -8
- package/dist/agents/usage.js +25 -16
- package/dist/auto-reply/commands-registry.data.js +85 -11
- package/dist/auto-reply/dispatch.js +40 -21
- package/dist/auto-reply/reply/abort.js +102 -33
- package/dist/auto-reply/reply/commands-core.js +82 -33
- package/dist/auto-reply/reply/commands-export-session.js +1 -1
- package/dist/auto-reply/reply/commands-info.js +41 -12
- package/dist/auto-reply/reply/commands-subagents.js +352 -100
- package/dist/auto-reply/reply/commands-system-prompt.js +2 -2
- package/dist/auto-reply/reply/dispatch-from-config.js +100 -29
- package/dist/auto-reply/reply/elevated-unavailable.js +1 -1
- package/dist/auto-reply/reply/inbound-meta.js +12 -1
- package/dist/auto-reply/reply/mentions.js +18 -11
- package/dist/auto-reply/reply/normalize-reply.js +17 -8
- package/dist/auto-reply/reply/reply-dispatcher.js +62 -10
- package/dist/auto-reply/reply/session.js +102 -21
- package/dist/auto-reply/reply/streaming-directives.js +16 -5
- package/dist/auto-reply/status.js +73 -50
- package/dist/browser/extension-relay.js +3 -3
- package/dist/browser/http-auth.js +1 -1
- package/dist/browser/paths.js +2 -2
- package/dist/build-info.json +3 -3
- package/dist/channels/allowlist-match.js +20 -0
- package/dist/channels/allowlists/resolve-utils.js +65 -2
- package/dist/channels/chat-type.js +8 -4
- package/dist/channels/dock.js +127 -35
- package/dist/channels/draft-stream-loop.js +6 -2
- package/dist/channels/plugins/actions/telegram.js +42 -18
- package/dist/channels/plugins/allowlist-match.js +1 -1
- package/dist/channels/plugins/group-mentions.js +51 -41
- package/dist/channels/plugins/message-action-names.js +2 -0
- package/dist/channels/plugins/message-actions.js +24 -5
- package/dist/channels/plugins/normalize/discord.js +26 -4
- package/dist/channels/plugins/normalize/signal.js +35 -22
- package/dist/channels/plugins/onboarding/helpers.js +8 -26
- package/dist/channels/plugins/outbound/imessage.js +15 -14
- package/dist/channels/registry.js +20 -7
- package/dist/cli/acp-cli.js +7 -5
- package/dist/cli/browser-cli-extension.js +25 -12
- package/dist/cli/browser-cli-state.cookies-storage.js +25 -6
- package/dist/cli/browser-cli-state.js +101 -145
- package/dist/cli/command-options.js +28 -0
- package/dist/cli/completion-cli.js +6 -6
- package/dist/cli/cron-cli/register.cron-add.js +25 -1
- package/dist/cli/cron-cli/register.cron-edit.js +44 -0
- package/dist/cli/cron-cli/shared.js +7 -1
- package/dist/cli/daemon-cli/lifecycle-core.js +23 -21
- package/dist/cli/daemon-cli/lifecycle.js +23 -247
- package/dist/cli/daemon-cli/register-service-commands.js +25 -4
- package/dist/cli/daemon-cli.js +1 -0
- package/dist/cli/devices-cli.js +33 -20
- package/dist/cli/gateway-cli/register.js +37 -105
- package/dist/cli/gateway-cli/run.js +49 -11
- package/dist/cli/nodes-camera.js +59 -4
- package/dist/cli/nodes-cli/register.camera.js +27 -24
- package/dist/cli/nodes-cli/rpc.js +21 -38
- package/dist/cli/qr-cli.js +2 -2
- package/dist/cli/skills-cli.format.js +2 -2
- package/dist/cli/update-cli/progress.js +2 -2
- package/dist/cli/update-cli/restart-helper.js +28 -7
- package/dist/cli/update-cli/shared.js +7 -7
- package/dist/cli/update-cli/status.js +1 -1
- package/dist/cli/update-cli/update-command.js +14 -8
- package/dist/cli/update-cli/wizard.js +2 -2
- package/dist/cli/update-cli.js +21 -1027
- package/dist/commands/auth-choice.apply.anthropic.js +10 -2
- package/dist/commands/channels/add-mutators.js +3 -35
- package/dist/commands/channels/add.js +39 -51
- package/dist/commands/config-validation.js +1 -1
- package/dist/commands/configure.gateway-auth.js +52 -15
- package/dist/commands/configure.gateway.js +84 -40
- package/dist/commands/doctor-completion.js +3 -3
- package/dist/commands/doctor-config-flow.js +536 -16
- package/dist/commands/doctor-gateway-services.js +103 -79
- package/dist/commands/doctor-memory-search.js +9 -9
- package/dist/commands/doctor-platform-notes.js +57 -30
- package/dist/commands/doctor-prompter.js +26 -15
- package/dist/commands/doctor-session-locks.js +1 -1
- package/dist/commands/doctor.js +21 -9
- package/dist/commands/model-picker.js +120 -95
- package/dist/commands/models/set.js +2 -21
- package/dist/commands/models/shared.js +65 -37
- package/dist/commands/onboard-helpers.js +81 -39
- package/dist/commands/openai-codex-oauth.js +1 -1
- package/dist/commands/sessions.js +52 -53
- package/dist/commands/status.summary.js +52 -34
- package/dist/commands/test-wizard-helpers.js +2 -2
- package/dist/config/defaults.js +79 -42
- package/dist/config/group-policy.js +50 -18
- package/dist/config/includes.js +37 -10
- package/dist/config/schema.help.js +5 -4
- package/dist/config/schema.hints.js +2 -2
- package/dist/config/schema.labels.js +1 -0
- package/dist/config/sessions/group.js +12 -11
- package/dist/config/sessions/paths.js +137 -11
- package/dist/config/sessions/store.js +185 -65
- package/dist/config/sessions/types.js +15 -1
- package/dist/config/sessions.js +1 -0
- package/dist/config/telegram-custom-commands.js +3 -2
- package/dist/config/types.js +2 -0
- package/dist/config/zod-schema.agent-defaults.js +6 -27
- package/dist/config/zod-schema.agent-runtime.js +171 -79
- package/dist/config/zod-schema.providers-core.js +138 -65
- package/dist/config/zod-schema.session.js +49 -22
- package/dist/control-ui/assets/index-HRr1grwl.js.map +1 -1
- package/dist/cron/isolated-agent/run.js +224 -57
- package/dist/cron/normalize.js +48 -45
- package/dist/cron/run-log.js +14 -0
- package/dist/cron/service/jobs.js +190 -28
- package/dist/cron/service/normalize.js +29 -11
- package/dist/cron/service/store.js +30 -44
- package/dist/cron/service/timer.js +182 -96
- package/dist/cron/service.js +3 -0
- package/dist/cron/stagger.js +37 -0
- package/dist/daemon/inspect.js +132 -92
- package/dist/daemon/runtime-paths.js +25 -4
- package/dist/daemon/service-audit.js +47 -16
- package/dist/discord/accounts.js +23 -20
- package/dist/discord/monitor/agent-components.js +1115 -219
- package/dist/discord/monitor/allow-list.js +114 -34
- package/dist/discord/monitor/listeners.js +204 -97
- package/dist/discord/monitor/message-handler.js +21 -10
- package/dist/discord/monitor/message-handler.preflight.js +195 -101
- package/dist/discord/monitor/message-handler.process.js +384 -123
- package/dist/discord/monitor/message-utils.js +86 -23
- package/dist/discord/monitor/native-command.js +77 -57
- package/dist/discord/monitor/provider.js +122 -117
- package/dist/discord/monitor/reply-context.js +20 -16
- package/dist/discord/monitor/reply-delivery.js +40 -8
- package/dist/discord/monitor/rest-fetch.js +22 -0
- package/dist/discord/monitor/threading.js +117 -24
- package/dist/discord/send.js +2 -1
- package/dist/discord/send.outbound.js +124 -11
- package/dist/discord/send.shared.js +112 -72
- package/dist/discord/voice-message.js +3 -3
- package/dist/gateway/auth.js +119 -44
- package/dist/gateway/call.js +76 -34
- package/dist/gateway/channel-health-monitor.js +57 -50
- package/dist/gateway/client.js +63 -29
- package/dist/gateway/control-ui-contract.js +1 -1
- package/dist/gateway/gateway-config-prompts.shared.js +2 -2
- package/dist/gateway/net.js +109 -1
- package/dist/gateway/protocol/index.js +5 -8
- package/dist/gateway/protocol/schema/agent.js +19 -1
- package/dist/gateway/protocol/schema/channels.js +21 -0
- package/dist/gateway/protocol/schema/cron.js +43 -30
- package/dist/gateway/protocol/schema/protocol-schemas.js +6 -11
- package/dist/gateway/protocol/schema/sessions.js +5 -1
- package/dist/gateway/protocol/schema.js +0 -1
- package/dist/gateway/server/presence-events.js +12 -0
- package/dist/gateway/server/ws-connection/message-handler.js +203 -212
- package/dist/gateway/server/ws-connection.js +58 -21
- package/dist/gateway/server-broadcast.js +18 -13
- package/dist/gateway/server-cron.js +177 -10
- package/dist/gateway/server-methods/agent-job.js +131 -38
- package/dist/gateway/server-methods/send.js +60 -14
- package/dist/gateway/server-methods/sessions.js +160 -96
- package/dist/gateway/server-methods/system.js +5 -7
- package/dist/gateway/server-methods-list.js +8 -0
- package/dist/gateway/server-methods.js +24 -8
- package/dist/gateway/server-node-events.js +278 -68
- package/dist/gateway/session-utils.fs.js +316 -75
- package/dist/gateway/session-utils.js +224 -70
- package/dist/gateway/sessions-patch.js +63 -20
- package/dist/gateway/test-temp-config.js +1 -1
- package/dist/gateway/tools-invoke-http.js +118 -70
- package/dist/gateway/ws-log.js +135 -107
- package/dist/hooks/frontmatter.js +36 -82
- package/dist/hooks/install.js +149 -139
- package/dist/hooks/internal-hooks.js +29 -4
- package/dist/hooks/plugin-hooks.js +2 -1
- package/dist/imessage/monitor/deliver.js +10 -4
- package/dist/imessage/monitor/monitor-provider.js +138 -375
- package/dist/imessage/monitor/runtime.js +4 -8
- package/dist/imessage/send.js +65 -19
- package/dist/infra/exec-approvals-allowlist.js +7 -0
- package/dist/infra/exec-approvals.js +35 -920
- package/dist/infra/exec-safe-bin-trust.js +64 -0
- package/dist/infra/heartbeat-runner.js +207 -134
- package/dist/infra/heartbeat-wake.js +183 -22
- package/dist/infra/install-source-utils.js +47 -0
- package/dist/infra/net/ssrf.js +170 -36
- package/dist/infra/outbound/deliver.js +224 -58
- package/dist/infra/outbound/message-action-spec.js +12 -5
- package/dist/infra/outbound/outbound-session.js +27 -25
- package/dist/infra/poolbot-root.js +32 -22
- package/dist/infra/ports.js +14 -11
- package/dist/infra/skills-remote.js +48 -37
- package/dist/infra/system-events.js +25 -11
- package/dist/infra/system-presence.js +26 -33
- package/dist/infra/tmp-poolbot-dir.js +81 -2
- package/dist/infra/wsl.js +37 -1
- package/dist/line/bot-message-context.js +163 -191
- package/dist/logging/subsystem.js +59 -22
- package/dist/markdown/ir.js +124 -50
- package/dist/media/store.js +1 -1
- package/dist/media-understanding/runner.entries.js +42 -25
- package/dist/media-understanding/runner.js +53 -488
- package/dist/memory/embeddings-gemini.js +53 -38
- package/dist/memory/manager-embedding-ops.js +48 -69
- package/dist/pairing/pairing-store.js +178 -119
- package/dist/plugin-sdk/index.js +34 -6
- package/dist/plugins/hooks.js +135 -14
- package/dist/plugins/install.js +190 -152
- package/dist/polls.js +11 -0
- package/dist/routing/resolve-route.js +190 -56
- package/dist/routing/session-key.js +38 -22
- package/dist/runtime.js +35 -9
- package/dist/security/audit-channel.js +1 -1
- package/dist/sessions/session-key-utils.js +29 -11
- package/dist/shared/frontmatter.js +5 -5
- package/dist/shared/node-list-types.js +1 -0
- package/dist/shared/string-normalization.js +15 -0
- package/dist/signal/monitor/event-handler.js +68 -36
- package/dist/signal/send.js +29 -37
- package/dist/slack/monitor/allow-list.js +10 -11
- package/dist/slack/monitor/commands.js +14 -3
- package/dist/slack/monitor/events/interactions.js +4 -4
- package/dist/slack/monitor/media.js +224 -16
- package/dist/slack/monitor/message-handler/dispatch.js +247 -13
- package/dist/slack/monitor/message-handler/prepare.js +128 -45
- package/dist/slack/monitor/slash.js +357 -144
- package/dist/slack/streaming.js +77 -0
- package/dist/telegram/accounts.js +40 -13
- package/dist/telegram/allowed-updates.js +3 -0
- package/dist/telegram/bot/delivery.js +129 -66
- package/dist/telegram/bot/helpers.js +136 -122
- package/dist/telegram/bot-handlers.js +600 -339
- package/dist/telegram/bot-message-context.js +115 -73
- package/dist/telegram/bot-message-dispatch.js +235 -104
- package/dist/telegram/bot-native-command-menu.js +3 -1
- package/dist/telegram/bot-native-commands.js +213 -193
- package/dist/telegram/bot.js +24 -132
- package/dist/telegram/draft-stream.js +84 -75
- package/dist/telegram/format.js +150 -6
- package/dist/telegram/send.js +415 -255
- package/dist/telegram/targets.js +21 -2
- package/dist/telegram/update-offset-store.js +19 -3
- package/dist/terminal/restore.js +5 -2
- package/dist/test-utils/fetch-mock.js +5 -0
- package/dist/version.js +18 -5
- package/dist/web/auto-reply/monitor/broadcast.js +7 -3
- package/dist/web/auto-reply/monitor/on-message.js +6 -3
- package/dist/web/inbound/media.js +34 -8
- package/dist/web/inbound/monitor.js +34 -17
- package/dist/web/inbound/send-api.js +18 -17
- package/dist/web/outbound.js +12 -5
- package/dist/wizard/clack-prompter.js +40 -7
- package/extensions/bluebubbles/package.json +1 -1
- package/extensions/copilot-proxy/package.json +1 -1
- package/extensions/device-pair/index.ts +2 -2
- package/extensions/diagnostics-otel/package.json +1 -1
- package/extensions/discord/package.json +1 -1
- package/extensions/feishu/package.json +1 -1
- package/extensions/google-antigravity-auth/package.json +1 -1
- package/extensions/google-gemini-cli-auth/package.json +1 -1
- package/extensions/googlechat/package.json +1 -1
- package/extensions/imessage/package.json +1 -1
- package/extensions/irc/package.json +1 -1
- package/extensions/irc/src/accounts.ts +1 -1
- package/extensions/irc/src/onboarding.ts +4 -4
- package/extensions/line/package.json +1 -1
- package/extensions/llm-task/package.json +1 -1
- package/extensions/lobster/package.json +1 -1
- package/extensions/matrix/CHANGELOG.md +10 -0
- package/extensions/matrix/package.json +1 -1
- package/extensions/mattermost/package.json +1 -1
- package/extensions/memory-core/package.json +1 -1
- package/extensions/memory-lancedb/package.json +1 -1
- package/extensions/minimax-portal-auth/package.json +1 -1
- package/extensions/msteams/CHANGELOG.md +10 -0
- package/extensions/msteams/package.json +1 -1
- package/extensions/nextcloud-talk/package.json +1 -1
- package/extensions/nostr/CHANGELOG.md +10 -0
- package/extensions/nostr/package.json +1 -1
- package/extensions/open-prose/package.json +1 -1
- package/extensions/openai-codex-auth/package.json +1 -1
- package/extensions/signal/package.json +1 -1
- package/extensions/slack/package.json +1 -1
- package/extensions/telegram/package.json +1 -1
- package/extensions/tlon/package.json +1 -1
- package/extensions/twitch/CHANGELOG.md +10 -0
- package/extensions/twitch/package.json +1 -1
- package/extensions/voice-call/CHANGELOG.md +10 -0
- package/extensions/voice-call/package.json +1 -1
- package/extensions/whatsapp/package.json +1 -1
- package/extensions/zalo/CHANGELOG.md +10 -0
- package/extensions/zalo/package.json +1 -1
- package/extensions/zalouser/CHANGELOG.md +10 -0
- package/extensions/zalouser/package.json +1 -1
- package/package.json +1 -1
- package/skills/apple-reminders/SKILL.md +100 -49
- package/skills/coding-agent/SKILL.md +34 -28
- package/skills/github/SKILL.md +131 -16
- package/skills/imsg/SKILL.md +112 -15
- package/skills/openhue/SKILL.md +101 -19
- package/skills/tmux/SKILL.md +111 -79
- package/skills/weather/SKILL.md +88 -25
- package/dist/agents/openclaw-tools.js +0 -151
- package/dist/agents/tool-security.js +0 -96
- package/dist/gateway/url-validation.js +0 -94
- package/dist/infra/openclaw-root.js +0 -109
- package/dist/infra/tmp-openclaw-dir.js +0 -81
- package/dist/media/path-sanitization.js +0 -78
|
@@ -1,14 +1,22 @@
|
|
|
1
1
|
import { Type } from "@sinclair/typebox";
|
|
2
|
-
import {
|
|
2
|
+
import { fetchWithSsrFGuard } from "../../infra/net/fetch-guard.js";
|
|
3
|
+
import { SsrFBlockedError } from "../../infra/net/ssrf.js";
|
|
4
|
+
import { logDebug } from "../../logger.js";
|
|
5
|
+
import { wrapExternalContent, wrapWebContent } from "../../security/external-content.js";
|
|
6
|
+
import { normalizeSecretInput } from "../../utils/normalize-secret-input.js";
|
|
3
7
|
import { stringEnum } from "../schema/typebox.js";
|
|
4
8
|
import { jsonResult, readNumberParam, readStringParam } from "./common.js";
|
|
5
|
-
import { DEFAULT_CACHE_TTL_MINUTES, DEFAULT_TIMEOUT_SECONDS, normalizeCacheKey, readCache, readResponseText, resolveCacheTtlMs, resolveTimeoutSeconds, withTimeout, writeCache, } from "./web-shared.js";
|
|
6
9
|
import { extractReadableContent, htmlToMarkdown, markdownToText, truncateText, } from "./web-fetch-utils.js";
|
|
10
|
+
import { DEFAULT_CACHE_TTL_MINUTES, DEFAULT_TIMEOUT_SECONDS, normalizeCacheKey, readCache, readResponseText, resolveCacheTtlMs, resolveTimeoutSeconds, withTimeout, writeCache, } from "./web-shared.js";
|
|
7
11
|
export { extractReadableContent } from "./web-fetch-utils.js";
|
|
8
12
|
const EXTRACT_MODES = ["markdown", "text"];
|
|
9
13
|
const DEFAULT_FETCH_MAX_CHARS = 50_000;
|
|
14
|
+
const DEFAULT_FETCH_MAX_RESPONSE_BYTES = 2_000_000;
|
|
15
|
+
const FETCH_MAX_RESPONSE_BYTES_MIN = 32_000;
|
|
16
|
+
const FETCH_MAX_RESPONSE_BYTES_MAX = 10_000_000;
|
|
10
17
|
const DEFAULT_FETCH_MAX_REDIRECTS = 3;
|
|
11
18
|
const DEFAULT_ERROR_MAX_CHARS = 4_000;
|
|
19
|
+
const DEFAULT_ERROR_MAX_BYTES = 64_000;
|
|
12
20
|
const DEFAULT_FIRECRAWL_BASE_URL = "https://api.firecrawl.dev";
|
|
13
21
|
const DEFAULT_FIRECRAWL_MAX_AGE_MS = 172_800_000;
|
|
14
22
|
const DEFAULT_FETCH_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36";
|
|
@@ -26,38 +34,63 @@ const WebFetchSchema = Type.Object({
|
|
|
26
34
|
});
|
|
27
35
|
function resolveFetchConfig(cfg) {
|
|
28
36
|
const fetch = cfg?.tools?.web?.fetch;
|
|
29
|
-
if (!fetch || typeof fetch !== "object")
|
|
37
|
+
if (!fetch || typeof fetch !== "object") {
|
|
30
38
|
return undefined;
|
|
39
|
+
}
|
|
31
40
|
return fetch;
|
|
32
41
|
}
|
|
33
42
|
function resolveFetchEnabled(params) {
|
|
34
|
-
if (typeof params.fetch?.enabled === "boolean")
|
|
43
|
+
if (typeof params.fetch?.enabled === "boolean") {
|
|
35
44
|
return params.fetch.enabled;
|
|
45
|
+
}
|
|
36
46
|
return true;
|
|
37
47
|
}
|
|
38
48
|
function resolveFetchReadabilityEnabled(fetch) {
|
|
39
|
-
if (typeof fetch?.readability === "boolean")
|
|
49
|
+
if (typeof fetch?.readability === "boolean") {
|
|
40
50
|
return fetch.readability;
|
|
51
|
+
}
|
|
41
52
|
return true;
|
|
42
53
|
}
|
|
54
|
+
function resolveFetchMaxCharsCap(fetch) {
|
|
55
|
+
const raw = fetch && "maxCharsCap" in fetch && typeof fetch.maxCharsCap === "number"
|
|
56
|
+
? fetch.maxCharsCap
|
|
57
|
+
: undefined;
|
|
58
|
+
if (typeof raw !== "number" || !Number.isFinite(raw)) {
|
|
59
|
+
return DEFAULT_FETCH_MAX_CHARS;
|
|
60
|
+
}
|
|
61
|
+
return Math.max(100, Math.floor(raw));
|
|
62
|
+
}
|
|
63
|
+
function resolveFetchMaxResponseBytes(fetch) {
|
|
64
|
+
const raw = fetch && "maxResponseBytes" in fetch && typeof fetch.maxResponseBytes === "number"
|
|
65
|
+
? fetch.maxResponseBytes
|
|
66
|
+
: undefined;
|
|
67
|
+
if (typeof raw !== "number" || !Number.isFinite(raw) || raw <= 0) {
|
|
68
|
+
return DEFAULT_FETCH_MAX_RESPONSE_BYTES;
|
|
69
|
+
}
|
|
70
|
+
const value = Math.floor(raw);
|
|
71
|
+
return Math.min(FETCH_MAX_RESPONSE_BYTES_MAX, Math.max(FETCH_MAX_RESPONSE_BYTES_MIN, value));
|
|
72
|
+
}
|
|
43
73
|
function resolveFirecrawlConfig(fetch) {
|
|
44
|
-
if (!fetch || typeof fetch !== "object")
|
|
74
|
+
if (!fetch || typeof fetch !== "object") {
|
|
45
75
|
return undefined;
|
|
76
|
+
}
|
|
46
77
|
const firecrawl = "firecrawl" in fetch ? fetch.firecrawl : undefined;
|
|
47
|
-
if (!firecrawl || typeof firecrawl !== "object")
|
|
78
|
+
if (!firecrawl || typeof firecrawl !== "object") {
|
|
48
79
|
return undefined;
|
|
80
|
+
}
|
|
49
81
|
return firecrawl;
|
|
50
82
|
}
|
|
51
83
|
function resolveFirecrawlApiKey(firecrawl) {
|
|
52
84
|
const fromConfig = firecrawl && "apiKey" in firecrawl && typeof firecrawl.apiKey === "string"
|
|
53
|
-
? firecrawl.apiKey
|
|
85
|
+
? normalizeSecretInput(firecrawl.apiKey)
|
|
54
86
|
: "";
|
|
55
|
-
const fromEnv = (process.env.FIRECRAWL_API_KEY
|
|
87
|
+
const fromEnv = normalizeSecretInput(process.env.FIRECRAWL_API_KEY);
|
|
56
88
|
return fromConfig || fromEnv || undefined;
|
|
57
89
|
}
|
|
58
90
|
function resolveFirecrawlEnabled(params) {
|
|
59
|
-
if (typeof params.firecrawl?.enabled === "boolean")
|
|
91
|
+
if (typeof params.firecrawl?.enabled === "boolean") {
|
|
60
92
|
return params.firecrawl.enabled;
|
|
93
|
+
}
|
|
61
94
|
return Boolean(params.apiKey);
|
|
62
95
|
}
|
|
63
96
|
function resolveFirecrawlBaseUrl(firecrawl) {
|
|
@@ -67,28 +100,32 @@ function resolveFirecrawlBaseUrl(firecrawl) {
|
|
|
67
100
|
return raw || DEFAULT_FIRECRAWL_BASE_URL;
|
|
68
101
|
}
|
|
69
102
|
function resolveFirecrawlOnlyMainContent(firecrawl) {
|
|
70
|
-
if (typeof firecrawl?.onlyMainContent === "boolean")
|
|
103
|
+
if (typeof firecrawl?.onlyMainContent === "boolean") {
|
|
71
104
|
return firecrawl.onlyMainContent;
|
|
105
|
+
}
|
|
72
106
|
return true;
|
|
73
107
|
}
|
|
74
108
|
function resolveFirecrawlMaxAgeMs(firecrawl) {
|
|
75
109
|
const raw = firecrawl && "maxAgeMs" in firecrawl && typeof firecrawl.maxAgeMs === "number"
|
|
76
110
|
? firecrawl.maxAgeMs
|
|
77
111
|
: undefined;
|
|
78
|
-
if (typeof raw !== "number" || !Number.isFinite(raw))
|
|
112
|
+
if (typeof raw !== "number" || !Number.isFinite(raw)) {
|
|
79
113
|
return undefined;
|
|
114
|
+
}
|
|
80
115
|
const parsed = Math.max(0, Math.floor(raw));
|
|
81
116
|
return parsed > 0 ? parsed : undefined;
|
|
82
117
|
}
|
|
83
118
|
function resolveFirecrawlMaxAgeMsOrDefault(firecrawl) {
|
|
84
119
|
const resolved = resolveFirecrawlMaxAgeMs(firecrawl);
|
|
85
|
-
if (typeof resolved === "number")
|
|
120
|
+
if (typeof resolved === "number") {
|
|
86
121
|
return resolved;
|
|
122
|
+
}
|
|
87
123
|
return DEFAULT_FIRECRAWL_MAX_AGE_MS;
|
|
88
124
|
}
|
|
89
|
-
function resolveMaxChars(value, fallback) {
|
|
125
|
+
function resolveMaxChars(value, fallback, cap) {
|
|
90
126
|
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
|
91
|
-
|
|
127
|
+
const clamped = Math.max(100, Math.floor(parsed));
|
|
128
|
+
return Math.min(clamped, cap);
|
|
92
129
|
}
|
|
93
130
|
function resolveMaxRedirects(value, fallback) {
|
|
94
131
|
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
|
@@ -96,79 +133,17 @@ function resolveMaxRedirects(value, fallback) {
|
|
|
96
133
|
}
|
|
97
134
|
function looksLikeHtml(value) {
|
|
98
135
|
const trimmed = value.trimStart();
|
|
99
|
-
if (!trimmed)
|
|
136
|
+
if (!trimmed) {
|
|
100
137
|
return false;
|
|
138
|
+
}
|
|
101
139
|
const head = trimmed.slice(0, 256).toLowerCase();
|
|
102
140
|
return head.startsWith("<!doctype html") || head.startsWith("<html");
|
|
103
141
|
}
|
|
104
|
-
function isRedirectStatus(status) {
|
|
105
|
-
return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;
|
|
106
|
-
}
|
|
107
|
-
async function fetchWithRedirects(params) {
|
|
108
|
-
const signal = withTimeout(undefined, params.timeoutSeconds * 1000);
|
|
109
|
-
const visited = new Set();
|
|
110
|
-
let currentUrl = params.url;
|
|
111
|
-
let redirectCount = 0;
|
|
112
|
-
while (true) {
|
|
113
|
-
let parsedUrl;
|
|
114
|
-
try {
|
|
115
|
-
parsedUrl = new URL(currentUrl);
|
|
116
|
-
}
|
|
117
|
-
catch {
|
|
118
|
-
throw new Error("Invalid URL: must be http or https");
|
|
119
|
-
}
|
|
120
|
-
if (!["http:", "https:"].includes(parsedUrl.protocol)) {
|
|
121
|
-
throw new Error("Invalid URL: must be http or https");
|
|
122
|
-
}
|
|
123
|
-
const pinned = await resolvePinnedHostname(parsedUrl.hostname);
|
|
124
|
-
const dispatcher = createPinnedDispatcher(pinned);
|
|
125
|
-
let res;
|
|
126
|
-
try {
|
|
127
|
-
res = await fetch(parsedUrl.toString(), {
|
|
128
|
-
method: "GET",
|
|
129
|
-
headers: {
|
|
130
|
-
Accept: "*/*",
|
|
131
|
-
"User-Agent": params.userAgent,
|
|
132
|
-
"Accept-Language": "en-US,en;q=0.9",
|
|
133
|
-
},
|
|
134
|
-
signal,
|
|
135
|
-
redirect: "manual",
|
|
136
|
-
dispatcher,
|
|
137
|
-
});
|
|
138
|
-
}
|
|
139
|
-
catch (err) {
|
|
140
|
-
await closeDispatcher(dispatcher);
|
|
141
|
-
throw err;
|
|
142
|
-
}
|
|
143
|
-
if (isRedirectStatus(res.status)) {
|
|
144
|
-
const location = res.headers.get("location");
|
|
145
|
-
if (!location) {
|
|
146
|
-
await closeDispatcher(dispatcher);
|
|
147
|
-
throw new Error(`Redirect missing location header (${res.status})`);
|
|
148
|
-
}
|
|
149
|
-
redirectCount += 1;
|
|
150
|
-
if (redirectCount > params.maxRedirects) {
|
|
151
|
-
await closeDispatcher(dispatcher);
|
|
152
|
-
throw new Error(`Too many redirects (limit: ${params.maxRedirects})`);
|
|
153
|
-
}
|
|
154
|
-
const nextUrl = new URL(location, parsedUrl).toString();
|
|
155
|
-
if (visited.has(nextUrl)) {
|
|
156
|
-
await closeDispatcher(dispatcher);
|
|
157
|
-
throw new Error("Redirect loop detected");
|
|
158
|
-
}
|
|
159
|
-
visited.add(nextUrl);
|
|
160
|
-
void res.body?.cancel();
|
|
161
|
-
await closeDispatcher(dispatcher);
|
|
162
|
-
currentUrl = nextUrl;
|
|
163
|
-
continue;
|
|
164
|
-
}
|
|
165
|
-
return { response: res, finalUrl: currentUrl, dispatcher };
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
142
|
function formatWebFetchErrorDetail(params) {
|
|
169
143
|
const { detail, contentType, maxChars } = params;
|
|
170
|
-
if (!detail)
|
|
144
|
+
if (!detail) {
|
|
171
145
|
return "";
|
|
146
|
+
}
|
|
172
147
|
let text = detail;
|
|
173
148
|
const contentTypeLower = contentType?.toLowerCase();
|
|
174
149
|
if (contentTypeLower?.includes("text/html") || looksLikeHtml(detail)) {
|
|
@@ -179,6 +154,102 @@ function formatWebFetchErrorDetail(params) {
|
|
|
179
154
|
const truncated = truncateText(text.trim(), maxChars);
|
|
180
155
|
return truncated.text;
|
|
181
156
|
}
|
|
157
|
+
function redactUrlForDebugLog(rawUrl) {
|
|
158
|
+
try {
|
|
159
|
+
const parsed = new URL(rawUrl);
|
|
160
|
+
return parsed.pathname && parsed.pathname !== "/" ? `${parsed.origin}/...` : parsed.origin;
|
|
161
|
+
}
|
|
162
|
+
catch {
|
|
163
|
+
return "[invalid-url]";
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
const WEB_FETCH_WRAPPER_WITH_WARNING_OVERHEAD = wrapWebContent("", "web_fetch").length;
|
|
167
|
+
const WEB_FETCH_WRAPPER_NO_WARNING_OVERHEAD = wrapExternalContent("", {
|
|
168
|
+
source: "web_fetch",
|
|
169
|
+
includeWarning: false,
|
|
170
|
+
}).length;
|
|
171
|
+
function wrapWebFetchContent(value, maxChars) {
|
|
172
|
+
if (maxChars <= 0) {
|
|
173
|
+
return { text: "", truncated: true, rawLength: 0, wrappedLength: 0 };
|
|
174
|
+
}
|
|
175
|
+
const includeWarning = maxChars >= WEB_FETCH_WRAPPER_WITH_WARNING_OVERHEAD;
|
|
176
|
+
const wrapperOverhead = includeWarning
|
|
177
|
+
? WEB_FETCH_WRAPPER_WITH_WARNING_OVERHEAD
|
|
178
|
+
: WEB_FETCH_WRAPPER_NO_WARNING_OVERHEAD;
|
|
179
|
+
if (wrapperOverhead > maxChars) {
|
|
180
|
+
const minimal = includeWarning
|
|
181
|
+
? wrapWebContent("", "web_fetch")
|
|
182
|
+
: wrapExternalContent("", { source: "web_fetch", includeWarning: false });
|
|
183
|
+
const truncatedWrapper = truncateText(minimal, maxChars);
|
|
184
|
+
return {
|
|
185
|
+
text: truncatedWrapper.text,
|
|
186
|
+
truncated: true,
|
|
187
|
+
rawLength: 0,
|
|
188
|
+
wrappedLength: truncatedWrapper.text.length,
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
const maxInner = Math.max(0, maxChars - wrapperOverhead);
|
|
192
|
+
let truncated = truncateText(value, maxInner);
|
|
193
|
+
let wrappedText = includeWarning
|
|
194
|
+
? wrapWebContent(truncated.text, "web_fetch")
|
|
195
|
+
: wrapExternalContent(truncated.text, { source: "web_fetch", includeWarning: false });
|
|
196
|
+
if (wrappedText.length > maxChars) {
|
|
197
|
+
const excess = wrappedText.length - maxChars;
|
|
198
|
+
const adjustedMaxInner = Math.max(0, maxInner - excess);
|
|
199
|
+
truncated = truncateText(value, adjustedMaxInner);
|
|
200
|
+
wrappedText = includeWarning
|
|
201
|
+
? wrapWebContent(truncated.text, "web_fetch")
|
|
202
|
+
: wrapExternalContent(truncated.text, { source: "web_fetch", includeWarning: false });
|
|
203
|
+
}
|
|
204
|
+
return {
|
|
205
|
+
text: wrappedText,
|
|
206
|
+
truncated: truncated.truncated,
|
|
207
|
+
rawLength: truncated.text.length,
|
|
208
|
+
wrappedLength: wrappedText.length,
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
function wrapWebFetchField(value) {
|
|
212
|
+
if (!value) {
|
|
213
|
+
return value;
|
|
214
|
+
}
|
|
215
|
+
return wrapExternalContent(value, { source: "web_fetch", includeWarning: false });
|
|
216
|
+
}
|
|
217
|
+
function buildFirecrawlWebFetchPayload(params) {
|
|
218
|
+
const wrapped = wrapWebFetchContent(params.firecrawl.text, params.maxChars);
|
|
219
|
+
const wrappedTitle = params.firecrawl.title
|
|
220
|
+
? wrapWebFetchField(params.firecrawl.title)
|
|
221
|
+
: undefined;
|
|
222
|
+
return {
|
|
223
|
+
url: params.rawUrl, // Keep raw for tool chaining
|
|
224
|
+
finalUrl: params.firecrawl.finalUrl || params.finalUrlFallback, // Keep raw
|
|
225
|
+
status: params.firecrawl.status ?? params.statusFallback,
|
|
226
|
+
contentType: "text/markdown", // Protocol metadata, don't wrap
|
|
227
|
+
title: wrappedTitle,
|
|
228
|
+
extractMode: params.extractMode,
|
|
229
|
+
extractor: "firecrawl",
|
|
230
|
+
externalContent: {
|
|
231
|
+
untrusted: true,
|
|
232
|
+
source: "web_fetch",
|
|
233
|
+
wrapped: true,
|
|
234
|
+
},
|
|
235
|
+
truncated: wrapped.truncated,
|
|
236
|
+
length: wrapped.wrappedLength,
|
|
237
|
+
rawLength: wrapped.rawLength, // Actual content length, not wrapped
|
|
238
|
+
wrappedLength: wrapped.wrappedLength,
|
|
239
|
+
fetchedAt: new Date().toISOString(),
|
|
240
|
+
tookMs: params.tookMs,
|
|
241
|
+
text: wrapped.text,
|
|
242
|
+
warning: wrapWebFetchField(params.firecrawl.warning),
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
function normalizeContentType(value) {
|
|
246
|
+
if (!value) {
|
|
247
|
+
return undefined;
|
|
248
|
+
}
|
|
249
|
+
const [raw] = value.split(";");
|
|
250
|
+
const trimmed = raw?.trim();
|
|
251
|
+
return trimmed || undefined;
|
|
252
|
+
}
|
|
182
253
|
export async function fetchFirecrawlContent(params) {
|
|
183
254
|
const endpoint = resolveFirecrawlEndpoint(params.baseUrl);
|
|
184
255
|
const body = {
|
|
@@ -201,8 +272,8 @@ export async function fetchFirecrawlContent(params) {
|
|
|
201
272
|
});
|
|
202
273
|
const payload = (await res.json());
|
|
203
274
|
if (!res.ok || payload?.success === false) {
|
|
204
|
-
const detail = payload?.error
|
|
205
|
-
throw new Error(`Firecrawl fetch failed (${res.status}): ${detail}`.trim());
|
|
275
|
+
const detail = payload?.error ?? "";
|
|
276
|
+
throw new Error(`Firecrawl fetch failed (${res.status}): ${wrapWebContent(detail || res.statusText, "web_fetch")}`.trim());
|
|
206
277
|
}
|
|
207
278
|
const data = payload?.data ?? {};
|
|
208
279
|
const rawText = typeof data.markdown === "string"
|
|
@@ -219,11 +290,50 @@ export async function fetchFirecrawlContent(params) {
|
|
|
219
290
|
warning: payload?.warning,
|
|
220
291
|
};
|
|
221
292
|
}
|
|
293
|
+
function toFirecrawlContentParams(params) {
|
|
294
|
+
if (!params.firecrawlEnabled || !params.firecrawlApiKey) {
|
|
295
|
+
return null;
|
|
296
|
+
}
|
|
297
|
+
return {
|
|
298
|
+
url: params.url,
|
|
299
|
+
extractMode: params.extractMode,
|
|
300
|
+
apiKey: params.firecrawlApiKey,
|
|
301
|
+
baseUrl: params.firecrawlBaseUrl,
|
|
302
|
+
onlyMainContent: params.firecrawlOnlyMainContent,
|
|
303
|
+
maxAgeMs: params.firecrawlMaxAgeMs,
|
|
304
|
+
proxy: params.firecrawlProxy,
|
|
305
|
+
storeInCache: params.firecrawlStoreInCache,
|
|
306
|
+
timeoutSeconds: params.firecrawlTimeoutSeconds,
|
|
307
|
+
};
|
|
308
|
+
}
|
|
309
|
+
async function maybeFetchFirecrawlWebFetchPayload(params) {
|
|
310
|
+
const firecrawlParams = toFirecrawlContentParams({
|
|
311
|
+
...params,
|
|
312
|
+
url: params.urlToFetch,
|
|
313
|
+
extractMode: params.extractMode,
|
|
314
|
+
});
|
|
315
|
+
if (!firecrawlParams) {
|
|
316
|
+
return null;
|
|
317
|
+
}
|
|
318
|
+
const firecrawl = await fetchFirecrawlContent(firecrawlParams);
|
|
319
|
+
const payload = buildFirecrawlWebFetchPayload({
|
|
320
|
+
firecrawl,
|
|
321
|
+
rawUrl: params.url,
|
|
322
|
+
finalUrlFallback: params.finalUrlFallback,
|
|
323
|
+
statusFallback: params.statusFallback,
|
|
324
|
+
extractMode: params.extractMode,
|
|
325
|
+
maxChars: params.maxChars,
|
|
326
|
+
tookMs: params.tookMs,
|
|
327
|
+
});
|
|
328
|
+
writeCache(FETCH_CACHE, params.cacheKey, payload, params.cacheTtlMs);
|
|
329
|
+
return payload;
|
|
330
|
+
}
|
|
222
331
|
async function runWebFetch(params) {
|
|
223
332
|
const cacheKey = normalizeCacheKey(`fetch:${params.url}:${params.extractMode}:${params.maxChars}`);
|
|
224
333
|
const cached = readCache(FETCH_CACHE, cacheKey);
|
|
225
|
-
if (cached)
|
|
334
|
+
if (cached) {
|
|
226
335
|
return { ...cached.value, cached: true };
|
|
336
|
+
}
|
|
227
337
|
let parsedUrl;
|
|
228
338
|
try {
|
|
229
339
|
parsedUrl = new URL(params.url);
|
|
@@ -236,103 +346,88 @@ async function runWebFetch(params) {
|
|
|
236
346
|
}
|
|
237
347
|
const start = Date.now();
|
|
238
348
|
let res;
|
|
239
|
-
let
|
|
349
|
+
let release = null;
|
|
240
350
|
let finalUrl = params.url;
|
|
241
351
|
try {
|
|
242
|
-
const result = await
|
|
352
|
+
const result = await fetchWithSsrFGuard({
|
|
243
353
|
url: params.url,
|
|
244
354
|
maxRedirects: params.maxRedirects,
|
|
245
|
-
|
|
246
|
-
|
|
355
|
+
timeoutMs: params.timeoutSeconds * 1000,
|
|
356
|
+
init: {
|
|
357
|
+
headers: {
|
|
358
|
+
Accept: "text/markdown, text/html;q=0.9, */*;q=0.1",
|
|
359
|
+
"User-Agent": params.userAgent,
|
|
360
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
361
|
+
},
|
|
362
|
+
},
|
|
247
363
|
});
|
|
248
364
|
res = result.response;
|
|
249
365
|
finalUrl = result.finalUrl;
|
|
250
|
-
|
|
366
|
+
release = result.release;
|
|
367
|
+
// Cloudflare Markdown for Agents — log token budget hint when present
|
|
368
|
+
const markdownTokens = res.headers.get("x-markdown-tokens");
|
|
369
|
+
if (markdownTokens) {
|
|
370
|
+
logDebug(`[web-fetch] x-markdown-tokens: ${markdownTokens} (${redactUrlForDebugLog(finalUrl)})`);
|
|
371
|
+
}
|
|
251
372
|
}
|
|
252
373
|
catch (error) {
|
|
253
374
|
if (error instanceof SsrFBlockedError) {
|
|
254
375
|
throw error;
|
|
255
376
|
}
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
storeInCache: params.firecrawlStoreInCache,
|
|
266
|
-
timeoutSeconds: params.firecrawlTimeoutSeconds,
|
|
267
|
-
});
|
|
268
|
-
const truncated = truncateText(firecrawl.text, params.maxChars);
|
|
269
|
-
const payload = {
|
|
270
|
-
url: params.url,
|
|
271
|
-
finalUrl: firecrawl.finalUrl || finalUrl,
|
|
272
|
-
status: firecrawl.status ?? 200,
|
|
273
|
-
contentType: "text/markdown",
|
|
274
|
-
title: firecrawl.title,
|
|
275
|
-
extractMode: params.extractMode,
|
|
276
|
-
extractor: "firecrawl",
|
|
277
|
-
truncated: truncated.truncated,
|
|
278
|
-
length: truncated.text.length,
|
|
279
|
-
fetchedAt: new Date().toISOString(),
|
|
280
|
-
tookMs: Date.now() - start,
|
|
281
|
-
text: truncated.text,
|
|
282
|
-
warning: firecrawl.warning,
|
|
283
|
-
};
|
|
284
|
-
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
|
377
|
+
const payload = await maybeFetchFirecrawlWebFetchPayload({
|
|
378
|
+
...params,
|
|
379
|
+
urlToFetch: finalUrl,
|
|
380
|
+
finalUrlFallback: finalUrl,
|
|
381
|
+
statusFallback: 200,
|
|
382
|
+
cacheKey,
|
|
383
|
+
tookMs: Date.now() - start,
|
|
384
|
+
});
|
|
385
|
+
if (payload) {
|
|
285
386
|
return payload;
|
|
286
387
|
}
|
|
287
388
|
throw error;
|
|
288
389
|
}
|
|
289
390
|
try {
|
|
290
391
|
if (!res.ok) {
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
storeInCache: params.firecrawlStoreInCache,
|
|
301
|
-
timeoutSeconds: params.firecrawlTimeoutSeconds,
|
|
302
|
-
});
|
|
303
|
-
const truncated = truncateText(firecrawl.text, params.maxChars);
|
|
304
|
-
const payload = {
|
|
305
|
-
url: params.url,
|
|
306
|
-
finalUrl: firecrawl.finalUrl || finalUrl,
|
|
307
|
-
status: firecrawl.status ?? res.status,
|
|
308
|
-
contentType: "text/markdown",
|
|
309
|
-
title: firecrawl.title,
|
|
310
|
-
extractMode: params.extractMode,
|
|
311
|
-
extractor: "firecrawl",
|
|
312
|
-
truncated: truncated.truncated,
|
|
313
|
-
length: truncated.text.length,
|
|
314
|
-
fetchedAt: new Date().toISOString(),
|
|
315
|
-
tookMs: Date.now() - start,
|
|
316
|
-
text: truncated.text,
|
|
317
|
-
warning: firecrawl.warning,
|
|
318
|
-
};
|
|
319
|
-
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
|
392
|
+
const payload = await maybeFetchFirecrawlWebFetchPayload({
|
|
393
|
+
...params,
|
|
394
|
+
urlToFetch: params.url,
|
|
395
|
+
finalUrlFallback: finalUrl,
|
|
396
|
+
statusFallback: res.status,
|
|
397
|
+
cacheKey,
|
|
398
|
+
tookMs: Date.now() - start,
|
|
399
|
+
});
|
|
400
|
+
if (payload) {
|
|
320
401
|
return payload;
|
|
321
402
|
}
|
|
322
|
-
const
|
|
403
|
+
const rawDetailResult = await readResponseText(res, { maxBytes: DEFAULT_ERROR_MAX_BYTES });
|
|
404
|
+
const rawDetail = rawDetailResult.text;
|
|
323
405
|
const detail = formatWebFetchErrorDetail({
|
|
324
406
|
detail: rawDetail,
|
|
325
407
|
contentType: res.headers.get("content-type"),
|
|
326
408
|
maxChars: DEFAULT_ERROR_MAX_CHARS,
|
|
327
409
|
});
|
|
328
|
-
|
|
410
|
+
const wrappedDetail = wrapWebFetchContent(detail || res.statusText, DEFAULT_ERROR_MAX_CHARS);
|
|
411
|
+
throw new Error(`Web fetch failed (${res.status}): ${wrappedDetail.text}`);
|
|
329
412
|
}
|
|
330
413
|
const contentType = res.headers.get("content-type") ?? "application/octet-stream";
|
|
331
|
-
const
|
|
414
|
+
const normalizedContentType = normalizeContentType(contentType) ?? "application/octet-stream";
|
|
415
|
+
const bodyResult = await readResponseText(res, { maxBytes: params.maxResponseBytes });
|
|
416
|
+
const body = bodyResult.text;
|
|
417
|
+
const responseTruncatedWarning = bodyResult.truncated
|
|
418
|
+
? `Response body truncated after ${params.maxResponseBytes} bytes.`
|
|
419
|
+
: undefined;
|
|
332
420
|
let title;
|
|
333
421
|
let extractor = "raw";
|
|
334
422
|
let text = body;
|
|
335
|
-
if (contentType.includes("text/
|
|
423
|
+
if (contentType.includes("text/markdown")) {
|
|
424
|
+
// Cloudflare Markdown for Agents: server returned pre-rendered markdown
|
|
425
|
+
extractor = "cf-markdown";
|
|
426
|
+
if (params.extractMode === "text") {
|
|
427
|
+
text = markdownToText(body);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
else if (contentType.includes("text/html")) {
|
|
336
431
|
if (params.readabilityEnabled) {
|
|
337
432
|
const readable = await extractReadableContent({
|
|
338
433
|
html: body,
|
|
@@ -370,43 +465,47 @@ async function runWebFetch(params) {
|
|
|
370
465
|
extractor = "raw";
|
|
371
466
|
}
|
|
372
467
|
}
|
|
373
|
-
const
|
|
468
|
+
const wrapped = wrapWebFetchContent(text, params.maxChars);
|
|
469
|
+
const wrappedTitle = title ? wrapWebFetchField(title) : undefined;
|
|
470
|
+
const wrappedWarning = wrapWebFetchField(responseTruncatedWarning);
|
|
374
471
|
const payload = {
|
|
375
|
-
url: params.url,
|
|
376
|
-
finalUrl,
|
|
472
|
+
url: params.url, // Keep raw for tool chaining
|
|
473
|
+
finalUrl, // Keep raw
|
|
377
474
|
status: res.status,
|
|
378
|
-
contentType,
|
|
379
|
-
title,
|
|
475
|
+
contentType: normalizedContentType, // Protocol metadata, don't wrap
|
|
476
|
+
title: wrappedTitle,
|
|
380
477
|
extractMode: params.extractMode,
|
|
381
478
|
extractor,
|
|
382
|
-
|
|
383
|
-
|
|
479
|
+
externalContent: {
|
|
480
|
+
untrusted: true,
|
|
481
|
+
source: "web_fetch",
|
|
482
|
+
wrapped: true,
|
|
483
|
+
},
|
|
484
|
+
truncated: wrapped.truncated,
|
|
485
|
+
length: wrapped.wrappedLength,
|
|
486
|
+
rawLength: wrapped.rawLength, // Actual content length, not wrapped
|
|
487
|
+
wrappedLength: wrapped.wrappedLength,
|
|
384
488
|
fetchedAt: new Date().toISOString(),
|
|
385
489
|
tookMs: Date.now() - start,
|
|
386
|
-
text:
|
|
490
|
+
text: wrapped.text,
|
|
491
|
+
warning: wrappedWarning,
|
|
387
492
|
};
|
|
388
493
|
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
|
389
494
|
return payload;
|
|
390
495
|
}
|
|
391
496
|
finally {
|
|
392
|
-
|
|
497
|
+
if (release) {
|
|
498
|
+
await release();
|
|
499
|
+
}
|
|
393
500
|
}
|
|
394
501
|
}
|
|
395
502
|
async function tryFirecrawlFallback(params) {
|
|
396
|
-
|
|
503
|
+
const firecrawlParams = toFirecrawlContentParams(params);
|
|
504
|
+
if (!firecrawlParams) {
|
|
397
505
|
return null;
|
|
506
|
+
}
|
|
398
507
|
try {
|
|
399
|
-
const firecrawl = await fetchFirecrawlContent(
|
|
400
|
-
url: params.url,
|
|
401
|
-
extractMode: params.extractMode,
|
|
402
|
-
apiKey: params.firecrawlApiKey,
|
|
403
|
-
baseUrl: params.firecrawlBaseUrl,
|
|
404
|
-
onlyMainContent: params.firecrawlOnlyMainContent,
|
|
405
|
-
maxAgeMs: params.firecrawlMaxAgeMs,
|
|
406
|
-
proxy: params.firecrawlProxy,
|
|
407
|
-
storeInCache: params.firecrawlStoreInCache,
|
|
408
|
-
timeoutSeconds: params.firecrawlTimeoutSeconds,
|
|
409
|
-
});
|
|
508
|
+
const firecrawl = await fetchFirecrawlContent(firecrawlParams);
|
|
410
509
|
return { text: firecrawl.text, title: firecrawl.title };
|
|
411
510
|
}
|
|
412
511
|
catch {
|
|
@@ -415,8 +514,9 @@ async function tryFirecrawlFallback(params) {
|
|
|
415
514
|
}
|
|
416
515
|
function resolveFirecrawlEndpoint(baseUrl) {
|
|
417
516
|
const trimmed = baseUrl.trim();
|
|
418
|
-
if (!trimmed)
|
|
517
|
+
if (!trimmed) {
|
|
419
518
|
return `${DEFAULT_FIRECRAWL_BASE_URL}/v2/scrape`;
|
|
519
|
+
}
|
|
420
520
|
try {
|
|
421
521
|
const url = new URL(trimmed);
|
|
422
522
|
if (url.pathname && url.pathname !== "/") {
|
|
@@ -431,8 +531,9 @@ function resolveFirecrawlEndpoint(baseUrl) {
|
|
|
431
531
|
}
|
|
432
532
|
export function createWebFetchTool(options) {
|
|
433
533
|
const fetch = resolveFetchConfig(options?.config);
|
|
434
|
-
if (!resolveFetchEnabled({ fetch, sandboxed: options?.sandboxed }))
|
|
534
|
+
if (!resolveFetchEnabled({ fetch, sandboxed: options?.sandboxed })) {
|
|
435
535
|
return null;
|
|
536
|
+
}
|
|
436
537
|
const readabilityEnabled = resolveFetchReadabilityEnabled(fetch);
|
|
437
538
|
const firecrawl = resolveFirecrawlConfig(fetch);
|
|
438
539
|
const firecrawlApiKey = resolveFirecrawlApiKey(firecrawl);
|
|
@@ -443,6 +544,7 @@ export function createWebFetchTool(options) {
|
|
|
443
544
|
const firecrawlTimeoutSeconds = resolveTimeoutSeconds(firecrawl?.timeoutSeconds ?? fetch?.timeoutSeconds, DEFAULT_TIMEOUT_SECONDS);
|
|
444
545
|
const userAgent = (fetch && "userAgent" in fetch && typeof fetch.userAgent === "string" && fetch.userAgent) ||
|
|
445
546
|
DEFAULT_FETCH_USER_AGENT;
|
|
547
|
+
const maxResponseBytes = resolveFetchMaxResponseBytes(fetch);
|
|
446
548
|
return {
|
|
447
549
|
label: "Web Fetch",
|
|
448
550
|
name: "web_fetch",
|
|
@@ -453,10 +555,12 @@ export function createWebFetchTool(options) {
|
|
|
453
555
|
const url = readStringParam(params, "url", { required: true });
|
|
454
556
|
const extractMode = readStringParam(params, "extractMode") === "text" ? "text" : "markdown";
|
|
455
557
|
const maxChars = readNumberParam(params, "maxChars", { integer: true });
|
|
558
|
+
const maxCharsCap = resolveFetchMaxCharsCap(fetch);
|
|
456
559
|
const result = await runWebFetch({
|
|
457
560
|
url,
|
|
458
561
|
extractMode,
|
|
459
|
-
maxChars: resolveMaxChars(maxChars ?? fetch?.maxChars, DEFAULT_FETCH_MAX_CHARS),
|
|
562
|
+
maxChars: resolveMaxChars(maxChars ?? fetch?.maxChars, DEFAULT_FETCH_MAX_CHARS, maxCharsCap),
|
|
563
|
+
maxResponseBytes,
|
|
460
564
|
maxRedirects: resolveMaxRedirects(fetch?.maxRedirects, DEFAULT_FETCH_MAX_REDIRECTS),
|
|
461
565
|
timeoutSeconds: resolveTimeoutSeconds(fetch?.timeoutSeconds, DEFAULT_TIMEOUT_SECONDS),
|
|
462
566
|
cacheTtlMs: resolveCacheTtlMs(fetch?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES),
|