agentic-pi 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +83 -27
  2. package/dist/args.d.ts +16 -0
  3. package/dist/args.js +29 -0
  4. package/dist/args.js.map +1 -1
  5. package/dist/extensions/web-search/extract.d.ts +18 -0
  6. package/dist/extensions/web-search/extract.js +110 -0
  7. package/dist/extensions/web-search/extract.js.map +1 -0
  8. package/dist/extensions/web-search/index.d.ts +43 -0
  9. package/dist/extensions/web-search/index.js +86 -0
  10. package/dist/extensions/web-search/index.js.map +1 -0
  11. package/dist/extensions/web-search/providers/brave.d.ts +21 -0
  12. package/dist/extensions/web-search/providers/brave.js +73 -0
  13. package/dist/extensions/web-search/providers/brave.js.map +1 -0
  14. package/dist/extensions/web-search/providers/exa.d.ts +16 -0
  15. package/dist/extensions/web-search/providers/exa.js +85 -0
  16. package/dist/extensions/web-search/providers/exa.js.map +1 -0
  17. package/dist/extensions/web-search/providers/tavily.d.ts +18 -0
  18. package/dist/extensions/web-search/providers/tavily.js +85 -0
  19. package/dist/extensions/web-search/providers/tavily.js.map +1 -0
  20. package/dist/extensions/web-search/rate-limit.d.ts +14 -0
  21. package/dist/extensions/web-search/rate-limit.js +24 -0
  22. package/dist/extensions/web-search/rate-limit.js.map +1 -0
  23. package/dist/extensions/web-search/safe-fetch.d.ts +54 -0
  24. package/dist/extensions/web-search/safe-fetch.js +172 -0
  25. package/dist/extensions/web-search/safe-fetch.js.map +1 -0
  26. package/dist/extensions/web-search/selection.d.ts +42 -0
  27. package/dist/extensions/web-search/selection.js +64 -0
  28. package/dist/extensions/web-search/selection.js.map +1 -0
  29. package/dist/extensions/web-search/tools.d.ts +13 -0
  30. package/dist/extensions/web-search/tools.js +136 -0
  31. package/dist/extensions/web-search/tools.js.map +1 -0
  32. package/dist/extensions/web-search/types.d.ts +65 -0
  33. package/dist/extensions/web-search/types.js +10 -0
  34. package/dist/extensions/web-search/types.js.map +1 -0
  35. package/dist/run.d.ts +27 -0
  36. package/dist/run.js +13 -0
  37. package/dist/run.js.map +1 -1
  38. package/dist/runner.js +29 -1
  39. package/dist/runner.js.map +1 -1
  40. package/dist/sandbox/gondolin.d.ts +13 -4
  41. package/dist/sandbox/gondolin.js +10 -3
  42. package/dist/sandbox/gondolin.js.map +1 -1
  43. package/package.json +1 -1
package/README.md CHANGED
@@ -42,8 +42,8 @@ single line you parse.
42
42
 
43
43
  Pi explicitly does not support MCP. agentic-pi ships a native Pi extension
44
44
  exposing **31 GitHub tools** ported from lastlight's `mcp-github-app`:
45
- clone/push, issues, PRs, reviews, labels, search. Tools are registered with
46
- the `github_` prefix to match opencode's MCP-server-name convention.
45
+ clone/push, issues, PRs, reviews, labels, search. Tool names are prefixed
46
+ with `github_`.
47
47
 
48
48
  Auth is opinionated: **GitHub App credentials preferred**, static
49
49
  `GITHUB_TOKEN` only as a low-trust fallback. JWT-minted installation tokens
@@ -81,38 +81,29 @@ The `extension_status` JSONL event always reports `status`, `reason`,
81
81
  `message`, `profile`, and `toolCount` so the orchestrator can log the
82
82
  outcome programmatically without parsing stderr.
83
83
 
84
- ### 5. Models named the way opencode names them
84
+ ### 5. Model selection
85
85
 
86
- `--model provider/id` accepts the exact string format opencode used
87
- (`openai/gpt-5.5`, `anthropic/claude-opus-4-5`, etc.). Credentials come from
88
- environment variables (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`,
89
- `OPENROUTER_API_KEY`) or Pi's `~/.pi/agent/auth.json` if you've logged in
90
- interactively. Provider/id mapping is delegated to `@earendil-works/pi-ai`'s
91
- `getModel()`.
86
+ `--model provider/id` (e.g. `anthropic/claude-opus-4-5`, `openai/gpt-4o`).
87
+ Credentials come from environment variables (`OPENAI_API_KEY`,
88
+ `ANTHROPIC_API_KEY`, `OPENROUTER_API_KEY`) or Pi's `~/.pi/agent/auth.json`
89
+ if you've logged in interactively. Provider/id mapping is delegated to
90
+ `@earendil-works/pi-ai`'s `getModel()`.
92
91
 
93
92
  `--thinking <level>` maps directly to Pi's `thinkingLevel`
94
93
  (`off`/`minimal`/`low`/`medium`/`high`/`xhigh`). Per-provider effort is
95
94
  handled by Pi.
96
95
 
97
- ### 6. Things accepted but ignored for caller-side compatibility
98
-
99
- - `--dangerously-skip-permissions` — Pi has no permission prompts to skip
100
- ("run in a container" is Pi's design stance). The flag is accepted so a
101
- caller that previously spawned opencode does not need to strip it.
102
- - `--variant <level>` — alias for `--thinking`.
103
-
104
- ### 7. Defaults that match a containerized sandbox
96
+ ### 6. Defaults that match a containerized sandbox
105
97
 
106
98
  - **`--no-session`** is intended to be the default in sandboxed runs (state
107
99
  lives outside the container).
108
100
  - **Built-in tools** (read, write, edit, bash, grep, find, ls) are enabled
109
101
  by default. Add `--no-builtin-tools` if you want a GitHub-only agent.
110
102
  - **`AGENTS.md`** in the working directory is auto-loaded as the agent's
111
- system prompt — same convention Pi and opencode share. Drop your
112
- workflow's `AGENTS.md` into the mounted workspace and the agent picks it
113
- up.
103
+ system prompt — same convention Pi uses. Drop your workflow's
104
+ `AGENTS.md` into the mounted workspace and the agent picks it up.
114
105
 
115
- ### 8. Optional micro-VM sandboxing via `--sandbox gondolin`
106
+ ### 7. Optional micro-VM sandboxing via `--sandbox gondolin`
116
107
 
117
108
  By default Pi's file and bash tools run on the host. Pass `--sandbox gondolin`
118
109
  and they get routed through a per-run [Gondolin](https://github.com/earendil-works/gondolin)
@@ -195,14 +186,73 @@ The **App PEM is never copied into the VM** — only the resulting token,
195
186
  which is short-lived. User-supplied `--sandbox-env GITHUB_TOKEN=…`
196
187
  overrides the auto-injected value if you need to scope down further.
197
188
 
189
+ ### 8. Safe web search via the `web-search` extension
190
+
191
+ agentic-pi can register two native Pi tools — `web_search` and `web_fetch` —
192
+ so the agent can do general-purpose research. Backed by a configurable
193
+ provider:
194
+
195
+ | Provider | API key env var | Native content extraction |
196
+ | --- | --- | --- |
197
+ | Tavily (default) | `TAVILY_API_KEY` | yes (search + extract) |
198
+ | Exa | `EXA_API_KEY` | yes (search + contents) |
199
+ | Brave Search | `BRAVE_SEARCH_API_KEY` | no — `web_fetch` falls back to a safe HTML→text extractor |
200
+
201
+ **Auto-enable.** When at least one API key env var is present, the
202
+ extension is configured automatically. With multiple keys set, priority is
203
+ **Tavily → Exa → Brave**; override with `--web-search-provider` or the
204
+ `WEB_SEARCH_PROVIDER` env var. Pass `--no-web-search` to suppress the
205
+ tools entirely.
206
+
207
+ **Host-process egress.** Both tools run in the agentic-pi process, **not**
208
+ inside the Gondolin guest. That means:
209
+
210
+ - The provider API host is **not** added to the Gondolin egress
211
+ allowlist, and the API key is **never** injected into the VM.
212
+ - Behavior is identical under `--sandbox=none`, `--sandbox=gondolin`, and
213
+ when agentic-pi itself is containerized. The host's own network policy
214
+ controls reachability to the provider + arbitrary http(s) URLs.
215
+
216
+ **Safety rails (built-in, non-configurable in v1).**
217
+
218
+ | Rail | Default |
219
+ | --- | --- |
220
+ | URL scheme allowlist | `http`, `https` only (`web_fetch`) |
221
+ | Request timeout | 15 s |
222
+ | Max response bytes | 1 MiB (streamed, aborted on overflow) |
223
+ | Max redirects | 3 (scheme re-checked at each hop) |
224
+ | Content-type gate (`web_fetch`) | `text/*`, `application/(xhtml+xml\|xml\|json)` |
225
+ | Max search results | 10 (regardless of `max_results` arg) |
226
+ | Extracted text cap | ~200 KiB |
227
+ | HTML cleaning | `<script>`, `<style>`, `<noscript>`, `<iframe>`, comments stripped before extraction |
228
+ | Per-run call budget | 30 combined `web_search` + `web_fetch` calls (override with `--web-search-max-calls`) |
229
+
230
+ When the call budget is hit, further invocations return a structured
231
+ rate-limit error result so the agent can recover; the run is **not**
232
+ aborted.
233
+
234
+ **No SSRF blocking.** Loopback / private IP ranges are **not** blocked by
235
+ default. Operators who care should run agentic-pi behind their own
236
+ egress firewall.
237
+
238
+ **Event stream.** A second `extension_status` event mirrors GitHub's:
239
+
240
+ ```jsonl
241
+ {"type":"extension_status","extension":"web-search","status":"configured","provider":"tavily","toolCount":2,"maxCalls":30,"sessionId":"…","timestamp":"…"}
242
+ ```
243
+
244
+ When skipped (no keys / `--no-web-search`), `status: "skipped"` carries a
245
+ `reason` of `disabled-by-flag` or `no-credentials`. Misconfigurations
246
+ (explicit provider whose key is missing, or an unknown provider name)
247
+ surface as a warning before the run starts.
248
+
198
249
  ## When to use this
199
250
 
200
251
  - You have an orchestrator that calls a coding agent once per workflow
201
252
  phase, in a container, and parses a JSONL stream.
202
- - You used to call `opencode run --format json` and want a less-opaque
203
- replacement built on a more hackable substrate.
204
253
  - You need GitHub repo operations available to the agent without standing
205
254
  up an MCP server.
255
+ - You want safe, sandbox-mode-agnostic web search available to the agent.
206
256
 
207
257
  ## When **not** to use this
208
258
 
@@ -245,16 +295,19 @@ GITHUB_TOKEN=ghp_…
245
295
  | --- | --- |
246
296
  | `--model <provider/id>` | Required. e.g. `anthropic/claude-opus-4-5`, `openai/gpt-4o`. |
247
297
  | `--thinking <level>` | `off` \| `minimal` \| `low` \| `medium` \| `high` \| `xhigh`. |
248
- | `--variant <level>` | Alias for `--thinking`. |
249
298
  | `--profile <name>` | `read` \| `issues-write` \| `review-write` \| `repo-write`. Omit to disable GitHub tools entirely. |
250
299
  | `--cwd <path>` | Working directory for the agent. Default: `$PWD`. |
251
300
  | `--no-session` | Ephemeral run — do not persist session jsonl. Recommended in sandboxed containers. |
252
301
  | `--session-dir <path>` | Override session storage location. |
253
302
  | `--no-builtin-tools` | Disable Pi's `read,write,edit,bash,grep,find,ls`. |
254
303
  | `--tools <a,b,c>` | Explicit tool allowlist (combined with profile if set). |
255
- | `--sandbox <none\|gondolin>` | Route `read`/`write`/`edit`/`bash` through a sandbox backend. Default `none`. `gondolin` boots a QEMU micro-VM mounting cwd at `/workspace`. Requires QEMU on the host; native-only (not Docker-in-Docker). See section 8. |
304
+ | `--sandbox <none\|gondolin>` | Route `read`/`write`/`edit`/`bash` through a sandbox backend. Default `none`. `gondolin` boots a QEMU micro-VM mounting cwd at `/workspace`. Requires QEMU on the host; native-only (not Docker-in-Docker). See section 7. |
256
305
  | `--sandbox-env KEY=VAL` | Inject env var into the sandbox VM (repeatable). Ignored when `--sandbox=none`. Auto-injects a minted `GITHUB_TOKEN`/`GH_TOKEN` when `--profile` is also active. |
257
- | `--dangerously-skip-permissions` | Accepted for caller-side compatibility. No-op. |
306
+ | `--allow-host <host>` | Add host to the sandbox HTTP egress allowlist (repeatable). Ignored when `--sandbox=none`. |
307
+ | `--no-network` | Disable sandbox HTTP egress entirely. Ignored when `--sandbox=none`. |
308
+ | `--web-search-provider <p>` | Force web-search provider: `tavily` \| `brave` \| `exa`. Default: auto-detect by env. See section 8. |
309
+ | `--no-web-search` | Disable the web-search extension (no `web_search`/`web_fetch` tools). |
310
+ | `--web-search-max-calls <n>` | Cap combined `web_search` + `web_fetch` calls per run. Default: 30. |
258
311
 
259
312
  Reads the prompt from stdin. Emits JSONL on stdout. Exits 0 on `agent_end`,
260
313
  1 on fatal error.
@@ -265,6 +318,7 @@ Reads the prompt from stdin. Emits JSONL on stdout. Exits 0 on `agent_end`,
265
318
  {"type":"session","version":3,"id":"<uuid>","timestamp":"…","cwd":"…"}
266
319
  {"type":"sandbox_status","backend":"none","status":{"backend":"none"},"sessionId":"<uuid>","timestamp":"…"}
267
320
  {"type":"extension_status","extension":"github","status":"configured","profile":"read","toolCount":18,"sessionId":"<uuid>","timestamp":"…"}
321
+ {"type":"extension_status","extension":"web-search","status":"configured","provider":"tavily","toolCount":2,"maxCalls":30,"sessionId":"<uuid>","timestamp":"…"}
268
322
  {"type":"agent_start","sessionId":"<uuid>","timestamp":"…"}
269
323
  {"type":"turn_start","sessionId":"<uuid>","timestamp":"…"}
270
324
  {"type":"message_start","message":{…},"sessionId":"<uuid>","timestamp":"…"}
@@ -343,7 +397,8 @@ console.log(result.records.length); // full event log
343
397
  | `messages` | `unknown[]` | Full Pi message array from `agent_end`. |
344
398
  | `stats` | `{userMessages, assistantMessages, toolCalls, toolResults, tokens: {input, output, cacheRead, cacheWrite, total}, cost}` \| `undefined` | Token + cost rollup. |
345
399
  | `sandbox` | `{backend, status}` \| `undefined` | Mirror of the `sandbox_status` event. |
346
- | `github` | `{status, reason, profile, toolCount}` \| `undefined` | Mirror of the `extension_status` event. |
400
+ | `github` | `{status, reason, profile, toolCount}` \| `undefined` | Mirror of the GitHub `extension_status` event. |
401
+ | `webSearch` | `{status, reason, provider, toolCount, maxCalls}` \| `undefined` | Mirror of the web-search `extension_status` event. |
347
402
  | `records` | `EmitterRecord[]` | Every JSONL record in order. Same shape that the CLI writes. |
348
403
  | `warnings` | `string[]` | Warnings that would have gone to stderr in CLI mode. |
349
404
 
@@ -394,6 +449,7 @@ which walks `test/` for `*.test.ts`.
394
449
  | `test/models.test.ts` | `provider/id` parsing including openrouter triple-slash | — |
395
450
  | `test/extensions/github/profiles.test.ts` | Profile → tool allowlist (counts, superset structure, scope tiering) | — |
396
451
  | `test/extensions/github/credentials.test.ts` | `assertSafeToken` and `credentialsFilePath` validation | — |
452
+ | `test/extensions/web-search/*.test.ts` | Provider selection, extension wiring, safe-fetch rails, HTML extraction, rate limiter, per-provider normalization (all with injected `fetchImpl`) | — |
397
453
  | `test/sandbox/preflight.test.ts` | Preflight returns a structured ok\|error result | — |
398
454
  | `test/run.integration.test.ts` | Programmatic `run()`: RunResult populated, onEvent fires for every record, **child-process check confirms zero stdout/stderr leak from library** | `OPENAI_API_KEY` not set |
399
455
  | `test/run-sandbox.integration.test.ts` | `run({ sandbox: "gondolin" })` boots a VM, agent's `write` tool produces a host file via the mount | `OPENAI_API_KEY` not set OR QEMU/preflight unavailable |
package/dist/args.d.ts CHANGED
@@ -60,6 +60,22 @@ export interface RunConfig {
60
60
  * entirely. Ignored when `sandbox === "none"`.
61
61
  */
62
62
  allowedHttpHosts?: string[] | null;
63
+ /**
64
+ * Web-search extension toggle. Default: true (auto-enables when a
65
+ * provider API key env var is present). Pass `--no-web-search` to
66
+ * force-disable.
67
+ */
68
+ webSearch: boolean;
69
+ /**
70
+ * Explicit web-search provider. Overrides auto-detection by env var.
71
+ * Set via `--web-search-provider <tavily|brave|exa>`.
72
+ */
73
+ webSearchProvider?: string;
74
+ /**
75
+ * Per-run cap on combined web_search + web_fetch calls. Default: 30.
76
+ * Set via `--web-search-max-calls <n>`.
77
+ */
78
+ webSearchMaxCalls?: number;
63
79
  }
64
80
  export declare function printHelp(): void;
65
81
  export declare function parseArgs(argv: string[]): RunConfig;
package/dist/args.js CHANGED
@@ -34,6 +34,15 @@ Flags:
34
34
  Ignored when --sandbox=none.
35
35
  --no-network Disable HTTP egress from the sandbox entirely.
36
36
  Ignored when --sandbox=none.
37
+ --web-search-provider <p> Force a web-search provider: tavily | brave | exa.
38
+ Default: auto-detect from env (Tavily > Exa > Brave).
39
+ Provider's API key env var must be set:
40
+ TAVILY_API_KEY, EXA_API_KEY, or BRAVE_SEARCH_API_KEY.
41
+ --no-web-search Disable the web-search extension entirely
42
+ (web_search / web_fetch tools not registered).
43
+ --web-search-max-calls <n> Cap combined web_search + web_fetch calls per run.
44
+ Default: 30. When exceeded, further calls return a
45
+ structured error result.
37
46
  --sandbox-image <name> Image to boot when --sandbox=gondolin. Values:
38
47
  'default' (recommended) — bundled agentic-pi-dev image
39
48
  with git/gh/node/python/rust baked in (auto-downloaded).
@@ -54,6 +63,7 @@ export function parseArgs(argv) {
54
63
  noBuiltinTools: false,
55
64
  dangerouslySkipPermissions: false,
56
65
  sandbox: "none",
66
+ webSearch: true,
57
67
  };
58
68
  for (let i = 0; i < argv.length; i++) {
59
69
  const arg = argv[i];
@@ -147,6 +157,25 @@ export function parseArgs(argv) {
147
157
  case "--no-network":
148
158
  config.allowedHttpHosts = null;
149
159
  break;
160
+ case "--no-web-search":
161
+ config.webSearch = false;
162
+ break;
163
+ case "--web-search-provider": {
164
+ const v = next().trim();
165
+ if (!v)
166
+ throw new Error("--web-search-provider requires a value");
167
+ config.webSearchProvider = v;
168
+ break;
169
+ }
170
+ case "--web-search-max-calls": {
171
+ const v = next();
172
+ const n = Number(v);
173
+ if (!Number.isFinite(n) || Math.floor(n) !== n || n < 1) {
174
+ throw new Error(`--web-search-max-calls must be a positive integer (got '${v}')`);
175
+ }
176
+ config.webSearchMaxCalls = n;
177
+ break;
178
+ }
150
179
  case "-h":
151
180
  case "--help":
152
181
  printHelp();
package/dist/args.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"args.js","sourceRoot":"","sources":["../src/args.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AA2DH,MAAM,UAAU,SAAS;IACvB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsCtB,CAAC,CAAC;AACH,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,IAAc;IACtC,MAAM,MAAM,GAAc;QACxB,KAAK,EAAE,EAAE;QACT,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE;QAClB,SAAS,EAAE,KAAK;QAChB,cAAc,EAAE,KAAK;QACrB,0BAA0B,EAAE,KAAK;QACjC,OAAO,EAAE,MAAM;KAChB,CAAC;IAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,MAAM,IAAI,GAAG,GAAW,EAAE;YACxB,MAAM,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;YACpB,IAAI,CAAC,KAAK,SAAS;gBAAE,MAAM,IAAI,KAAK,CAAC,QAAQ,GAAG,mBAAmB,CAAC,CAAC;YACrE,OAAO,CAAC,CAAC;QACX,CAAC,CAAC;QACF,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,SAAS,CAAC;YACf,KAAK,IAAI;gBACP,MAAM,CAAC,KAAK,GAAG,IAAI,EAAE,CAAC;gBACtB,MAAM;YACR,KAAK,YAAY,CAAC;YAClB,KAAK,WAAW,CAAC,CAAC,CAAC;gBACjB,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC;gBACjB,IAAI,CAAC,CAAC,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;oBACtE,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,EAAE,CAAC,CAAC;gBACpD,CAAC;gBACD,MAAM,CAAC,QAAQ,GAAG,CAA0B,CAAC;gBAC7C,MAAM;YACR,CAAC;YACD,KAAK,WAAW;gBACd,MAAM,CAAC,OAAO,GAAG,IAAI,EAAE,CAAC;gBACxB,MAAM;YACR,KAAK,OAAO;gBACV,MAAM,CAAC,GAAG,GAAG,IAAI,EAAE,CAAC;gBACpB,MAAM;YACR,KAAK,cAAc;gBACjB,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC;gBACxB,MAAM;YACR,KAAK,eAAe;gBAClB,MAAM,CAAC,UAAU,GAAG,IAAI,EAAE,CAAC;gBAC3B,MAAM;YACR,KAAK,oBAAoB;gBACvB,MAAM,CAAC,cAAc,GAAG,IAAI,CAAC;gBAC7B,MAAM;YACR,KAAK,SAAS;gBACZ,MAAM,CAAC,KAAK,GAAG,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;gBACtE,MAAM;YACR,KAAK,gCAAgC;gBACnC,MAAM,CAAC,0BAA0B,GAAG,IAAI,CAAC;gBACzC,MAAM;YACR,KAAK,WAAW,CAAC,CAAC,CAAC;gBACjB,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC;gBACjB,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,KAAK,UAAU,EAAE,CAAC;oBACrC,MAAM,IAAI,KAAK,CAAC,sBAAsB,CAAC,8BAA8B,CAAC,CAAC;gBACzE,CAAC;gBACD,MAAM,CAAC,OAAO,GAAG,CAAC,CAAC;gBACnB,MAAM;YACR,CAAC;YACD,KAAK,iBAAiB,CAAC,CAAC,CAAC;gBACvB,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC;gBACjB,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACnB,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;gBAChE,CAAC;gBACD,MAAM,CAAC,YAAY,GAAG,CAAC,CAAC;gBACxB,MAAM;YACR,CAAC;YACD,KAAK,eAAe,CAAC,CAAC,CAAC;gBACrB,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC;gBACjB,MAAM,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;gBAC1B,IAAI,EAAE,GAAG,CAAC,EAAE,CAAC;oBACX,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,IAAI,CAAC,CAAC;gBAChE,CAAC;gBACD,MAAM,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC3B,MAAM,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;gBAC5B,IAAI,CAAC,0BAA0B,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;oBAC1C,MAAM,IAAI,KAAK,CAAC,6DAA6D,GAAG,IAAI,CAAC,CAAC;gBACxF,CAAC;gBACD,MAAM,CAAC,UAAU,GAAG,EAAE,GAAG,CAAC,MAAM,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC;gBACjE,MAAM;YACR,CAAC;YACD,KAAK,cAAc,CAAC,CAAC,CAAC;gBACpB,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBACxB,IAAI,CAAC,CAAC;oBAAE,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;gBAClE,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;oBAClC,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,IAAI,CAAC,CAAC;gBACtE,CAAC;gBACD,gEAAgE;gBAChE,gEAAgE;gBAChE,+DAA+D;gBAC/D,gEAAgE;gBAChE,kEAAkE;gBAClE,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC,CAAC,EAAE,CAAC;gBAClF,MAAM,CAAC,gBAAgB,GAAG,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC;gBACtC,MAAM;YACR,CAAC;YACD,KAAK,cAAc;gBACjB,MAAM,CAAC,gBAAgB,GAAG,IAAI,CAAC;gBAC/B,MAAM;YACR,KAAK,IAAI,CAAC;YACV,KAAK,QAAQ;gBACX,SAAS,EAAE,CAAC;gBACZ,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB;gBACE,MAAM,IAAI,KAAK,CAAC,iBAAiB,GAAG,EAAE,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,uDAAuD,CAAC,CAAC;IAC3E,CAAC;IACD,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAChC,MAAM,IAAI,KAAK,CAAC,uCAAuC,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC;IAC1E,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
1
+ {"version":3,"file":"args.js","sourceRoot":"","sources":["../src/args.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AA2EH,MAAM,UAAU,SAAS;IACvB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA+CtB,CAAC,CAAC;AACH,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,IAAc;IACtC,MAAM,MAAM,GAAc;QACxB,KAAK,EAAE,EAAE;QACT,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE;QAClB,SAAS,EAAE,KAAK;QAChB,cAAc,EAAE,KAAK;QACrB,0BAA0B,EAAE,KAAK;QACjC,OAAO,EAAE,MAAM;QACf,SAAS,EAAE,IAAI;KAChB,CAAC;IAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,MAAM,IAAI,GAAG,GAAW,EAAE;YACxB,MAAM,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;YACpB,IAAI,CAAC,KAAK,SAAS;gBAAE,MAAM,IAAI,KAAK,CAAC,QAAQ,GAAG,mBAAmB,CAAC,CAAC;YACrE,OAAO,CAAC,CAAC;QACX,CAAC,CAAC;QACF,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,SAAS,CAAC;YACf,KAAK,IAAI;gBACP,MAAM,CAAC,KAAK,GAAG,IAAI,EAAE,CAAC;gBACtB,MAAM;YACR,KAAK,YAAY,CAAC;YAClB,KAAK,WAAW,CAAC,CAAC,CAAC;gBACjB,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC;gBACjB,IAAI,CAAC,CAAC,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;oBACtE,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,EAAE,CAAC,CAAC;gBACpD,CAAC;gBACD,MAAM,CAAC,QAAQ,GAAG,CAA0B,CAAC;gBAC7C,MAAM;YACR,CAAC;YACD,KAAK,WAAW;gBACd,MAAM,CAAC,OAAO,GAAG,IAAI,EAAE,CAAC;gBACxB,MAAM;YACR,KAAK,OAAO;gBACV,MAAM,CAAC,GAAG,GAAG,IAAI,EAAE,CAAC;gBACpB,MAAM;YACR,KAAK,cAAc;gBACjB,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC;gBACxB,MAAM;YACR,KAAK,eAAe;gBAClB,MAAM,CAAC,UAAU,GAAG,IAAI,EAAE,CAAC;gBAC3B,MAAM;YACR,KAAK,oBAAoB;gBACvB,MAAM,CAAC,cAAc,GAAG,IAAI,CAAC;gBAC7B,MAAM;YACR,KAAK,SAAS;gBACZ,MAAM,CAAC,KAAK,GAAG,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;gBACtE,MAAM;YACR,KAAK,gCAAgC;gBACnC,MAAM,CAAC,0BAA0B,GAAG,IAAI,CAAC;gBACzC,MAAM;YACR,KAAK,WAAW,CAAC,CAAC,CAAC;gBACjB,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC;gBACjB,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,KAAK,UAAU,EAAE,CAAC;oBACrC,MAAM,IAAI,KAAK,CAAC,sBAAsB,CAAC,8BAA8B,CAAC,CAAC;gBACzE,CAAC;gBACD,MAAM,CAAC,OAAO,GAAG,CAAC,CAAC;gBACnB,MAAM;YACR,CAAC;YACD,KAAK,iBAAiB,CAAC,CAAC,CAAC;gBACvB,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC;gBACjB,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACnB,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;gBAChE,CAAC;gBACD,MAAM,CAAC,YAAY,GAAG,CAAC,CAAC;gBACxB,MAAM;YACR,CAAC;YACD,KAAK,eAAe,CAAC,CAAC,CAAC;gBACrB,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC;gBACjB,MAAM,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;gBAC1B,IAAI,EAAE,GAAG,CAAC,EAAE,CAAC;oBACX,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,IAAI,CAAC,CAAC;gBAChE,CAAC;gBACD,MAAM,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC3B,MAAM,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;gBAC5B,IAAI,CAAC,0BAA0B,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;oBAC1C,MAAM,IAAI,KAAK,CAAC,6DAA6D,GAAG,IAAI,CAAC,CAAC;gBACxF,CAAC;gBACD,MAAM,CAAC,UAAU,GAAG,EAAE,GAAG,CAAC,MAAM,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC;gBACjE,MAAM;YACR,CAAC;YACD,KAAK,cAAc,CAAC,CAAC,CAAC;gBACpB,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBACxB,IAAI,CAAC,CAAC;oBAAE,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;gBAClE,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;oBAClC,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,IAAI,CAAC,CAAC;gBACtE,CAAC;gBACD,gEAAgE;gBAChE,gEAAgE;gBAChE,+DAA+D;gBAC/D,gEAAgE;gBAChE,kEAAkE;gBAClE,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC,CAAC,EAAE,CAAC;gBAClF,MAAM,CAAC,gBAAgB,GAAG,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC;gBACtC,MAAM;YACR,CAAC;YACD,KAAK,cAAc;gBACjB,MAAM,CAAC,gBAAgB,GAAG,IAAI,CAAC;gBAC/B,MAAM;YACR,KAAK,iBAAiB;gBACpB,MAAM,CAAC,SAAS,GAAG,KAAK,CAAC;gBACzB,MAAM;YACR,KAAK,uBAAuB,CAAC,CAAC,CAAC;gBAC7B,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBACxB,IAAI,CAAC,CAAC;oBAAE,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;gBAClE,MAAM,CAAC,iBAAiB,GAAG,CAAC,CAAC;gBAC7B,MAAM;YACR,CAAC;YACD,KAAK,wBAAwB,CAAC,CAAC,CAAC;gBAC9B,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC;gBACjB,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;gBACpB,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;oBACxD,MAAM,IAAI,KAAK,CAAC,2DAA2D,CAAC,IAAI,CAAC,CAAC;gBACpF,CAAC;gBACD,MAAM,CAAC,iBAAiB,GAAG,CAAC,CAAC;gBAC7B,MAAM;YACR,CAAC;YACD,KAAK,IAAI,CAAC;YACV,KAAK,QAAQ;gBACX,SAAS,EAAE,CAAC;gBACZ,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB;gBACE,MAAM,IAAI,KAAK,CAAC,iBAAiB,GAAG,EAAE,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,uDAAuD,CAAC,CAAC;IAC3E,CAAC;IACD,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAChC,MAAM,IAAI,KAAK,CAAC,uCAAuC,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC;IAC1E,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Minimal HTML → readable text extractor. No dependencies.
3
+ *
4
+ * Approach:
5
+ * 1. Strip <script>, <style>, <noscript>, <iframe>, and HTML comments
6
+ * so the agent never sees code or hidden trackers.
7
+ * 2. Replace block-level tags with newlines so paragraphs stay separated.
8
+ * 3. Drop all other tags.
9
+ * 4. Decode the common named entities and any &#NN; / &#xHH; numeric
10
+ * escapes.
11
+ * 5. Collapse runs of whitespace; cap output at MAX_BYTES.
12
+ *
13
+ * Not a Readability-style content-only extractor — for that, use Tavily or
14
+ * Exa's native extraction, which the provider clients invoke directly.
15
+ */
16
+ export declare const EXTRACT_MAX_BYTES: number;
17
+ export declare function extractTitle(html: string): string | undefined;
18
+ export declare function htmlToText(html: string, maxBytes?: number): string;
@@ -0,0 +1,110 @@
1
+ /**
2
+ * Minimal HTML → readable text extractor. No dependencies.
3
+ *
4
+ * Approach:
5
+ * 1. Strip <script>, <style>, <noscript>, <iframe>, and HTML comments
6
+ * so the agent never sees code or hidden trackers.
7
+ * 2. Replace block-level tags with newlines so paragraphs stay separated.
8
+ * 3. Drop all other tags.
9
+ * 4. Decode the common named entities and any &#NN; / &#xHH; numeric
10
+ * escapes.
11
+ * 5. Collapse runs of whitespace; cap output at MAX_BYTES.
12
+ *
13
+ * Not a Readability-style content-only extractor — for that, use Tavily or
14
+ * Exa's native extraction, which the provider clients invoke directly.
15
+ */
16
+ export const EXTRACT_MAX_BYTES = 200 * 1024;
17
+ const NAMED_ENTITIES = {
18
+ amp: "&",
19
+ lt: "<",
20
+ gt: ">",
21
+ quot: '"',
22
+ apos: "'",
23
+ nbsp: " ",
24
+ copy: "(c)",
25
+ reg: "(R)",
26
+ trade: "(TM)",
27
+ hellip: "…",
28
+ mdash: "—",
29
+ ndash: "–",
30
+ lsquo: "‘",
31
+ rsquo: "’",
32
+ ldquo: "“",
33
+ rdquo: "”",
34
+ };
35
+ function decodeEntities(s) {
36
+ return s.replace(/&(#x?[0-9a-fA-F]+|[a-zA-Z]+);/g, (_m, body) => {
37
+ if (body.startsWith("#x") || body.startsWith("#X")) {
38
+ const code = parseInt(body.slice(2), 16);
39
+ return Number.isFinite(code) ? safeFromCodePoint(code) : _m;
40
+ }
41
+ if (body.startsWith("#")) {
42
+ const code = parseInt(body.slice(1), 10);
43
+ return Number.isFinite(code) ? safeFromCodePoint(code) : _m;
44
+ }
45
+ const mapped = NAMED_ENTITIES[body];
46
+ return mapped ?? _m;
47
+ });
48
+ }
49
+ function safeFromCodePoint(code) {
50
+ if (code < 0 || code > 0x10ffff)
51
+ return "";
52
+ try {
53
+ return String.fromCodePoint(code);
54
+ }
55
+ catch {
56
+ return "";
57
+ }
58
+ }
59
+ const BLOCK_TAGS = new Set([
60
+ "p", "br", "div", "section", "article", "header", "footer", "main",
61
+ "nav", "aside", "ul", "ol", "li", "table", "tr", "td", "th",
62
+ "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "hr", "pre", "code",
63
+ "dl", "dt", "dd", "figure", "figcaption",
64
+ ]);
65
+ export function extractTitle(html) {
66
+ const m = /<title[^>]*>([\s\S]*?)<\/title>/i.exec(html);
67
+ if (!m)
68
+ return undefined;
69
+ const t = decodeEntities(m[1]).replace(/\s+/g, " ").trim();
70
+ return t.length > 0 ? t : undefined;
71
+ }
72
+ export function htmlToText(html, maxBytes = EXTRACT_MAX_BYTES) {
73
+ // 1. Strip dangerous / noise sections wholesale.
74
+ let s = html.replace(/<!--[\s\S]*?-->/g, "");
75
+ s = s.replace(/<script\b[\s\S]*?<\/script\s*>/gi, "");
76
+ s = s.replace(/<style\b[\s\S]*?<\/style\s*>/gi, "");
77
+ s = s.replace(/<noscript\b[\s\S]*?<\/noscript\s*>/gi, "");
78
+ s = s.replace(/<iframe\b[\s\S]*?<\/iframe\s*>/gi, "");
79
+ // 2. Convert block-level open/close tags to newlines so paragraphs survive.
80
+ s = s.replace(/<\/?([a-zA-Z][a-zA-Z0-9]*)\b[^>]*>/g, (_match, tag) => {
81
+ if (BLOCK_TAGS.has(tag.toLowerCase()))
82
+ return "\n";
83
+ return "";
84
+ });
85
+ // 3. Decode entities.
86
+ s = decodeEntities(s);
87
+ // 4. Normalize whitespace: collapse runs of spaces/tabs; trim per line;
88
+ // collapse runs of blank lines.
89
+ s = s.replace(/\r\n?/g, "\n");
90
+ s = s
91
+ .split("\n")
92
+ .map((line) => line.replace(/[ \t\f\v]+/g, " ").trim())
93
+ .filter((line, i, arr) => {
94
+ // collapse 2+ blank lines down to 1
95
+ if (line !== "")
96
+ return true;
97
+ return arr[i - 1] !== "";
98
+ })
99
+ .join("\n")
100
+ .trim();
101
+ // 5. Byte cap. UTF-8 size approximation by encoding; on overflow, slice
102
+ // on code-point boundary then re-decode.
103
+ const encoder = new TextEncoder();
104
+ const bytes = encoder.encode(s);
105
+ if (bytes.byteLength <= maxBytes)
106
+ return s;
107
+ // Decode just the prefix.
108
+ return new TextDecoder("utf-8", { fatal: false }).decode(bytes.subarray(0, maxBytes));
109
+ }
110
+ //# sourceMappingURL=extract.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extract.js","sourceRoot":"","sources":["../../../src/extensions/web-search/extract.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,MAAM,CAAC,MAAM,iBAAiB,GAAG,GAAG,GAAG,IAAI,CAAC;AAE5C,MAAM,cAAc,GAA2B;IAC7C,GAAG,EAAE,GAAG;IACR,EAAE,EAAE,GAAG;IACP,EAAE,EAAE,GAAG;IACP,IAAI,EAAE,GAAG;IACT,IAAI,EAAE,GAAG;IACT,IAAI,EAAE,GAAG;IACT,IAAI,EAAE,KAAK;IACX,GAAG,EAAE,KAAK;IACV,KAAK,EAAE,MAAM;IACb,MAAM,EAAE,GAAG;IACX,KAAK,EAAE,GAAG;IACV,KAAK,EAAE,GAAG;IACV,KAAK,EAAE,GAAG;IACV,KAAK,EAAE,GAAG;IACV,KAAK,EAAE,GAAG;IACV,KAAK,EAAE,GAAG;CACX,CAAC;AAEF,SAAS,cAAc,CAAC,CAAS;IAC/B,OAAO,CAAC,CAAC,OAAO,CAAC,gCAAgC,EAAE,CAAC,EAAE,EAAE,IAAY,EAAE,EAAE;QACtE,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACnD,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACzC,OAAO,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC9D,CAAC;QACD,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACzB,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACzC,OAAO,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC9D,CAAC;QACD,MAAM,MAAM,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QACpC,OAAO,MAAM,IAAI,EAAE,CAAC;IACtB,CAAC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,IAAI,IAAI,GAAG,CAAC,IAAI,IAAI,GAAG,QAAQ;QAAE,OAAO,EAAE,CAAC;IAC3C,IAAI,CAAC;QACH,OAAO,MAAM,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IACpC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC;IACzB,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM;IAClE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IAC3D,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM;IACrE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY;CACzC,CAAC,CAAC;AAEH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,MAAM,CAAC,GAAG,kCAAkC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACxD,IAAI,CAAC,CAAC;QAAE,OAAO,SAAS,CAAC;IACzB,MAAM,CAAC,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAC3D,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AACtC,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,IAAY,EAAE,QAAQ,GAAG,iBAAiB;IACnE,iDAAiD;IACjD,IAAI,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC;IAC7C,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,kCAAkC,EAAE,EAAE,CAAC,CAAC;IACtD,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,gCAAgC,EAAE,EAAE,CAAC,CAAC;IACpD,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,sCAAsC,EAAE,EAAE,CAAC,CAAC;IAC1D,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,kCAAkC,EAAE,EAAE,CAAC,CAAC;IAEtD,4EAA4E;IAC5E,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,qCAAqC,EAAE,CAAC,MAAM,EAAE,GAAW,EAAE,EAAE;QAC3E,IAAI,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC;YAAE,OAAO,IAAI,CAAC;QACnD,OAAO,EAAE,CAAC;IACZ,CAAC,CAAC,CAAC;IAEH,sBAAsB;IACtB,CAAC,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;IAEtB,wEAAwE;IACxE,mCAAmC;IACnC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IAC9B,CAAC,GAAG,CAAC;SACF,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;SACtD,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,EAAE;QACvB,oCAAoC;QACpC,IAAI,IAAI,KAAK,EAAE;YAAE,OAAO,IAAI,CAAC;QAC7B,OAAO,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC;IAC3B,CAAC,CAAC;SACD,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,EAAE,CAAC;IAEV,wEAAwE;IACxE,4CAA4C;IAC5C,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAC;IAClC,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAChC,IAAI,KAAK,CAAC,UAAU,IAAI,QAAQ;QAAE,OAAO,CAAC,CAAC;IAC3C,0BAA0B;IAC1B,OAAO,IAAI,WAAW,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC,MAAM,CACtD,KAAK,CAAC,QAAQ,CAAC,CAAC,EAAE,QAAQ,CAAC,CAC5B,CAAC;AACJ,CAAC"}
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Web-search extension entry point.
3
+ *
4
+ * Mirrors `src/extensions/github/index.ts`:
5
+ * - silent skip when no provider is selected / no key present
6
+ * - loud (warning-worthy) skip when the user explicitly asked for a
7
+ * provider but didn't supply its key, or set an unknown name
8
+ * - on success, hands a typed customTools list back to the runner
9
+ *
10
+ * Selection logic lives in `selection.ts`; this module wires the chosen
11
+ * provider to its tool builder and a per-run RateLimiter.
12
+ */
13
+ import type { ToolDefinition } from "@earendil-works/pi-coding-agent";
14
+ import type { ProviderName, WebSearchSkipReason } from "./types.js";
15
+ export declare const DEFAULT_MAX_CALLS = 30;
16
+ export interface WebSearchExtensionConfig {
17
+ /** When false, the extension is force-skipped (disabled-by-flag). Default: true. */
18
+ webSearch?: boolean;
19
+ /** Explicit provider override. */
20
+ webSearchProvider?: string;
21
+ /** Per-run call budget shared across web_search + web_fetch. Default: 30. */
22
+ webSearchMaxCalls?: number;
23
+ /** Env override (defaults to process.env). Injected by tests. */
24
+ env?: Record<string, string | undefined>;
25
+ }
26
+ export interface WebSearchExtensionResult {
27
+ /** Tools to merge into createAgentSession({ customTools }). */
28
+ customTools: ToolDefinition<any>[];
29
+ toolNames: string[];
30
+ status: "configured" | "skipped";
31
+ reason?: WebSearchSkipReason;
32
+ message?: string;
33
+ provider?: ProviderName;
34
+ /** The cap actually enforced (echoed for observability). */
35
+ maxCalls?: number;
36
+ }
37
+ export declare function loadWebSearchExtension(config?: WebSearchExtensionConfig): WebSearchExtensionResult;
38
+ /**
39
+ * True if the skip is something the user almost certainly wants surfaced
40
+ * as a warning (vs. the silent "no keys set" case).
41
+ */
42
+ export declare function isMisconfigurationSkip(result: WebSearchExtensionResult): boolean;
43
+ export type { ProviderName, WebSearchSkipReason } from "./types.js";
@@ -0,0 +1,86 @@
1
+ /**
2
+ * Web-search extension entry point.
3
+ *
4
+ * Mirrors `src/extensions/github/index.ts`:
5
+ * - silent skip when no provider is selected / no key present
6
+ * - loud (warning-worthy) skip when the user explicitly asked for a
7
+ * provider but didn't supply its key, or set an unknown name
8
+ * - on success, hands a typed customTools list back to the runner
9
+ *
10
+ * Selection logic lives in `selection.ts`; this module wires the chosen
11
+ * provider to its tool builder and a per-run RateLimiter.
12
+ */
13
+ import { createBraveProvider } from "./providers/brave.js";
14
+ import { createExaProvider } from "./providers/exa.js";
15
+ import { createTavilyProvider } from "./providers/tavily.js";
16
+ import { RateLimiter } from "./rate-limit.js";
17
+ import { selectProvider } from "./selection.js";
18
+ import { buildWebSearchTools } from "./tools.js";
19
+ export const DEFAULT_MAX_CALLS = 30;
20
+ export function loadWebSearchExtension(config = {}) {
21
+ const env = config.env ?? process.env;
22
+ const input = {
23
+ webSearch: config.webSearch ?? true,
24
+ webSearchProvider: config.webSearchProvider,
25
+ env,
26
+ };
27
+ const selection = selectProvider(input);
28
+ if (selection.status === "skipped") {
29
+ return {
30
+ customTools: [],
31
+ toolNames: [],
32
+ status: "skipped",
33
+ reason: selection.reason,
34
+ message: selection.message,
35
+ provider: selection.provider,
36
+ };
37
+ }
38
+ const provider = instantiateProvider(selection.provider, selection.apiKey);
39
+ const maxCalls = clampMaxCalls(config.webSearchMaxCalls);
40
+ const limiter = new RateLimiter(maxCalls);
41
+ const tools = buildWebSearchTools(provider, limiter);
42
+ return {
43
+ customTools: tools,
44
+ toolNames: tools.map((t) => t.name),
45
+ status: "configured",
46
+ provider: selection.provider,
47
+ message: selection.message,
48
+ maxCalls,
49
+ };
50
+ }
51
+ /**
52
+ * True if the skip is something the user almost certainly wants surfaced
53
+ * as a warning (vs. the silent "no keys set" case).
54
+ */
55
+ export function isMisconfigurationSkip(result) {
56
+ if (result.status !== "skipped")
57
+ return false;
58
+ if (result.reason === "invalid-config")
59
+ return true;
60
+ // Explicit provider asked for, but no key — louder than the generic
61
+ // "no creds at all" skip.
62
+ if (result.reason === "no-credentials" && result.provider !== undefined)
63
+ return true;
64
+ return false;
65
+ }
66
+ function instantiateProvider(name, apiKey) {
67
+ switch (name) {
68
+ case "tavily":
69
+ return createTavilyProvider({ apiKey });
70
+ case "brave":
71
+ return createBraveProvider({ apiKey });
72
+ case "exa":
73
+ return createExaProvider({ apiKey });
74
+ }
75
+ }
76
+ function clampMaxCalls(v) {
77
+ if (v === undefined || !Number.isFinite(v))
78
+ return DEFAULT_MAX_CALLS;
79
+ const n = Math.floor(v);
80
+ if (n < 1)
81
+ return 1;
82
+ if (n > 1000)
83
+ return 1000;
84
+ return n;
85
+ }
86
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/extensions/web-search/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAIH,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAC3D,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAuB,MAAM,gBAAgB,CAAC;AACrE,OAAO,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAGjD,MAAM,CAAC,MAAM,iBAAiB,GAAG,EAAE,CAAC;AA0BpC,MAAM,UAAU,sBAAsB,CACpC,SAAmC,EAAE;IAErC,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,IAAK,OAAO,CAAC,GAA0C,CAAC;IAC9E,MAAM,KAAK,GAAmB;QAC5B,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,IAAI;QACnC,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;QAC3C,GAAG;KACJ,CAAC;IAEF,MAAM,SAAS,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;IACxC,IAAI,SAAS,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;QACnC,OAAO;YACL,WAAW,EAAE,EAAE;YACf,SAAS,EAAE,EAAE;YACb,MAAM,EAAE,SAAS;YACjB,MAAM,EAAE,SAAS,CAAC,MAAM;YACxB,OAAO,EAAE,SAAS,CAAC,OAAO;YAC1B,QAAQ,EAAE,SAAS,CAAC,QAAQ;SAC7B,CAAC;IACJ,CAAC;IAED,MAAM,QAAQ,GAAG,mBAAmB,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;IAC3E,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC;IACzD,MAAM,OAAO,GAAG,IAAI,WAAW,CAAC,QAAQ,CAAC,CAAC;IAC1C,MAAM,KAAK,GAAG,mBAAmB,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAErD,OAAO;QACL,WAAW,EAAE,KAAK;QAClB,SAAS,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACnC,MAAM,EAAE,YAAY;QACpB,QAAQ,EAAE,SAAS,CAAC,QAAQ;QAC5B,OAAO,EAAE,SAAS,CAAC,OAAO;QAC1B,QAAQ;KACT,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,sBAAsB,CAAC,MAAgC;IACrE,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS;QAAE,OAAO,KAAK,CAAC;IAC9C,IAAI,MAAM,CAAC,MAAM,KAAK,gBAAgB;QAAE,OAAO,IAAI,CAAC;IACpD,oEAAoE;IACpE,0BAA0B;IAC1B,IAAI,MAAM,CAAC,MAAM,KAAK,gBAAgB,IAAI,MAAM,CAAC,QAAQ,KAAK,SAAS;QAAE,OAAO,IAAI,CAAC;IACrF,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAkB,EAAE,MAAc;IAC7D,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,QAAQ;YACX,OAAO,oBAAoB,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QAC1C,KAAK,OAAO;YACV,OAAO,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QACzC,KAAK,KAAK;YACR,OAAO,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IACzC,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,CAAqB;IAC1C,IAAI,CAAC,KAAK,SAAS,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;QAAE,OAAO,iBAAiB,CAAC;IACrE,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACxB,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IACpB,IAAI,CAAC,GAAG,IAAI;QAAE,OAAO,IAAI,CAAC;IAC1B,OAAO,CAAC,CAAC;AACX,CAAC"}
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Brave Search provider. https://api.search.brave.com/app/documentation
3
+ *
4
+ * Endpoint used:
5
+ * GET https://api.search.brave.com/res/v1/web/search?q=…&count=…
6
+ * Header: X-Subscription-Token: <key>
7
+ *
8
+ * Brave has no content-extraction endpoint, so this provider has no
9
+ * `fetch()` method — the tool layer falls back to safeFetch + the HTML
10
+ * extractor for `web_fetch`.
11
+ *
12
+ * `include_domains` / `exclude_domains` are honored via client-side
13
+ * post-filtering so the tool's schema behaves uniformly across providers.
14
+ */
15
+ import type { FetchImpl, Provider } from "../types.js";
16
+ export interface BraveOptions {
17
+ apiKey: string;
18
+ fetchImpl?: FetchImpl;
19
+ baseUrl?: string;
20
+ }
21
+ export declare function createBraveProvider(options: BraveOptions): Provider;
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Brave Search provider. https://api.search.brave.com/app/documentation
3
+ *
4
+ * Endpoint used:
5
+ * GET https://api.search.brave.com/res/v1/web/search?q=…&count=…
6
+ * Header: X-Subscription-Token: <key>
7
+ *
8
+ * Brave has no content-extraction endpoint, so this provider has no
9
+ * `fetch()` method — the tool layer falls back to safeFetch + the HTML
10
+ * extractor for `web_fetch`.
11
+ *
12
+ * `include_domains` / `exclude_domains` are honored via client-side
13
+ * post-filtering so the tool's schema behaves uniformly across providers.
14
+ */
15
+ function hostMatches(url, pattern) {
16
+ let host;
17
+ try {
18
+ host = new URL(url).hostname.toLowerCase();
19
+ }
20
+ catch {
21
+ return false;
22
+ }
23
+ const p = pattern.toLowerCase();
24
+ return host === p || host.endsWith(`.${p}`);
25
+ }
26
+ export function createBraveProvider(options) {
27
+ const fetchImpl = options.fetchImpl ?? globalThis.fetch;
28
+ const baseUrl = options.baseUrl ?? "https://api.search.brave.com/res/v1";
29
+ return {
30
+ name: "brave",
31
+ supportsExtractedContent: false,
32
+ async search(params) {
33
+ const url = new URL(`${baseUrl}/web/search`);
34
+ url.searchParams.set("q", params.query);
35
+ // Brave's `count` caps at 20; we then post-filter and slice.
36
+ url.searchParams.set("count", String(Math.min(20, Math.max(1, params.maxResults * 2))));
37
+ const r = await fetchImpl(url.toString(), {
38
+ method: "GET",
39
+ headers: {
40
+ accept: "application/json",
41
+ "x-subscription-token": options.apiKey,
42
+ },
43
+ });
44
+ if (!r.ok) {
45
+ const text = await r.text().catch(() => "");
46
+ throw new Error(`brave search failed: http ${r.status} ${text.slice(0, 200)}`);
47
+ }
48
+ const data = (await r.json());
49
+ const items = (data.web?.results ?? []).map((it) => ({
50
+ title: it.title ?? "",
51
+ url: it.url ?? "",
52
+ snippet: it.description,
53
+ publishedDate: it.age,
54
+ }));
55
+ let filtered = items;
56
+ if (params.includeDomains?.length) {
57
+ const list = params.includeDomains;
58
+ filtered = filtered.filter((it) => it.url && list.some((d) => hostMatches(it.url, d)));
59
+ }
60
+ if (params.excludeDomains?.length) {
61
+ const list = params.excludeDomains;
62
+ filtered = filtered.filter((it) => !(it.url && list.some((d) => hostMatches(it.url, d))));
63
+ }
64
+ filtered = filtered.slice(0, params.maxResults);
65
+ return {
66
+ provider: "brave",
67
+ query: params.query,
68
+ results: filtered,
69
+ };
70
+ },
71
+ };
72
+ }
73
+ //# sourceMappingURL=brave.js.map