@phi-code-admin/phi-code 0.75.6 → 0.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  # Phi Code Extensions
2
2
 
3
- 10 TypeScript extensions automatically loaded at startup.
3
+ 11 TypeScript extensions automatically loaded at startup.
4
4
 
5
5
  ## Extensions
6
6
 
@@ -17,6 +17,31 @@
17
17
  | **Setup** | `setup.ts` | — | `/setup` | — |
18
18
  | **Keys** | `keys.ts` | — | `/keys` | `session_start` (hot-reload watcher) |
19
19
  | **Models** | `models.ts` | — | `/models` | `session_start` (background catalog refresh) |
20
+ | **Browser** | `browser.ts` | `browser_navigate`, `browser_extract`, `browser_screenshot`, `browser_search`, `browser_click`, `browser_type`, `browser_scroll`, `browser_snapshot`, `browser_close_tab`, `browser_list_tabs` | — | `session_shutdown` (kill Firefox) |
21
+
22
+ ## Bundled browser engine (Camoufox)
23
+
24
+ The `browser.ts` extension exposes ten high-level tools backed by a
25
+ vendored snapshot of [Camoufox](https://github.com/daijro/camoufox) v135.0.1-beta.24
26
+ (anti-detect Firefox fork, MPL-2.0). It bypasses Cloudflare and most
27
+ bot-detection that plain `fetch` + cheerio can't.
28
+
29
+ - Phi-code ships **its own copy** of the JS launcher and the OpenClaw
30
+ automation server (`@phi-code-admin/camoufox-js`,
31
+ `@phi-code-admin/camofox-browser`, `@phi-code-admin/browser`) so no
32
+ third-party-maintained npm package sits on the critical path.
33
+ - The Firefox binary itself is re-hosted on
34
+ [uglyswap/phi-code releases](https://github.com/uglyswap/phi-code/releases/tag/binaries-v1.0.0)
35
+ and downloaded once by the camoufox-js postinstall, cached under
36
+ `~/.cache/phi-code/camoufox/v1.0.0/<platform>-<arch>/` (XDG / Library /
37
+ LOCALAPPDATA respected). No runtime call to daijro/camoufox.
38
+ - `PHI_BROWSER_DISABLED=1` turns the extension off without uninstalling.
39
+ - `CAMOUFOX_BIN_DIR=/absolute/path` overrides the cache (air-gapped CI).
40
+ - `CAMOUFOX_SKIP_DOWNLOAD=1` skips the postinstall download; useful when
41
+ you want to ship pre-baked Docker images.
42
+
43
+ The web-search cascade (`web_search`, `/search`) is unchanged — Camoufox
44
+ is an additional capability, not a replacement.
20
45
 
21
46
  ## Live Model Catalogs
22
47
 
@@ -0,0 +1,268 @@
1
+ /**
2
+ * Browser Extension for Phi Code
3
+ *
4
+ * Registers 10 browser tools backed by the bundled Camoufox stack
5
+ * (`@phi-code-admin/browser`):
6
+ *
7
+ * browser_navigate — open/follow a URL
8
+ * browser_extract — readability extraction (works on SPAs)
9
+ * browser_screenshot — PNG capture, base64 in the tool result
10
+ * browser_search — DDG/Google search macro
11
+ * browser_click — click by accessibility ref or CSS selector
12
+ * browser_type — type text into focused/targeted element
13
+ * browser_scroll — page/element scroll
14
+ * browser_snapshot — accessibility tree with refs for follow-up tools
15
+ * browser_close_tab — release a single tab
16
+ * browser_list_tabs — list open tabs for the current session
17
+ *
18
+ * Lifecycle:
19
+ * - Lazy boot: the Camoufox server starts on the first tool call.
20
+ * - `session_shutdown`: best-effort `closeAll()` to avoid zombie Firefox.
21
+ * - PHI_BROWSER_DISABLED=1 disables the whole extension at startup (the
22
+ * user keeps the legacy `web_search` / `fetch_url` only).
23
+ */
24
+
25
+ import { Type } from "@sinclair/typebox";
26
+ import type { ExtensionAPI } from "phi-code";
27
+
28
+ // PHI-VENDOR: dynamic import so phi-code keeps starting even when the
29
+ // vendored browser stack isn't installed (e.g. binaries unavailable for
30
+ // the host's `process.platform`-`process.arch` combo). We surface a
31
+ // concrete error on first tool call instead of refusing to boot.
32
+ type BrowserApi = typeof import("@phi-code-admin/browser");
33
+
34
+ let cachedApi: BrowserApi | undefined;
35
+ let importError: Error | undefined;
36
+
37
+ async function getBrowserApi(): Promise<BrowserApi> {
38
+ if (cachedApi) return cachedApi;
39
+ if (importError) throw importError;
40
+ try {
41
+ cachedApi = (await import("@phi-code-admin/browser")) as BrowserApi;
42
+ return cachedApi;
43
+ } catch (err) {
44
+ importError = err instanceof Error ? err : new Error(String(err));
45
+ throw importError;
46
+ }
47
+ }
48
+
49
+ function isDisabled(): boolean {
50
+ const v = process.env.PHI_BROWSER_DISABLED;
51
+ return v === "1" || v === "true" || v === "yes";
52
+ }
53
+
54
+ function jsonResult(value: unknown): string {
55
+ return typeof value === "string" ? value : JSON.stringify(value, null, 2);
56
+ }
57
+
58
+ export default function browserExtension(pi: ExtensionAPI) {
59
+ if (isDisabled()) {
60
+ // Keep startup quiet — the user opted out.
61
+ return;
62
+ }
63
+
64
+ // ─── browser_navigate ─────────────────────────────────────────────
65
+ pi.registerTool({
66
+ name: "browser_navigate",
67
+ description:
68
+ "Open a URL in a Camoufox tab. If `tabId` is omitted, a new tab is created. Returns the tab id, the final URL and the HTTP status when available.",
69
+ parameters: Type.Object({
70
+ url: Type.String({ description: "Full URL (https://...)" }),
71
+ tabId: Type.Optional(Type.String()),
72
+ waitUntil: Type.Optional(
73
+ Type.Union([
74
+ Type.Literal("load"),
75
+ Type.Literal("domcontentloaded"),
76
+ Type.Literal("networkidle"),
77
+ ]),
78
+ ),
79
+ timeoutMs: Type.Optional(Type.Number()),
80
+ }),
81
+ execute: async (params) => {
82
+ const api = await getBrowserApi();
83
+ const res = await api.navigate(params);
84
+ return { content: [{ type: "text", text: jsonResult(res) }] };
85
+ },
86
+ });
87
+
88
+ // ─── browser_extract ──────────────────────────────────────────────
89
+ pi.registerTool({
90
+ name: "browser_extract",
91
+ description:
92
+ "Return the readable content of a page (Mozilla Readability under the hood). Works on SPA / JS-heavy sites. Pass either `tabId` (existing tab) or `url` (opens a fresh tab).",
93
+ parameters: Type.Object({
94
+ tabId: Type.Optional(Type.String()),
95
+ url: Type.Optional(Type.String()),
96
+ mode: Type.Optional(
97
+ Type.Union([
98
+ Type.Literal("readability"),
99
+ Type.Literal("html"),
100
+ Type.Literal("text"),
101
+ ]),
102
+ ),
103
+ }),
104
+ execute: async (params) => {
105
+ const api = await getBrowserApi();
106
+ const res = await api.extract(params);
107
+ return { content: [{ type: "text", text: jsonResult(res) }] };
108
+ },
109
+ });
110
+
111
+ // ─── browser_screenshot ───────────────────────────────────────────
112
+ pi.registerTool({
113
+ name: "browser_screenshot",
114
+ description:
115
+ "Capture a screenshot of the current tab as a PNG. The image bytes are returned base64-encoded under `bytesBase64`.",
116
+ parameters: Type.Object({
117
+ tabId: Type.String(),
118
+ fullPage: Type.Optional(Type.Boolean()),
119
+ }),
120
+ execute: async (params) => {
121
+ const api = await getBrowserApi();
122
+ const res = await api.screenshot(params);
123
+ return { content: [{ type: "text", text: jsonResult(res) }] };
124
+ },
125
+ });
126
+
127
+ // ─── browser_search ───────────────────────────────────────────────
128
+ pi.registerTool({
129
+ name: "browser_search",
130
+ description:
131
+ "Run a web search through the Camoufox browser (anti-detect Firefox) and return the readability extraction of the results page. Useful when scraping Google directly is rate-limited.",
132
+ parameters: Type.Object({
133
+ query: Type.String(),
134
+ engine: Type.Optional(
135
+ Type.Union([
136
+ Type.Literal("google"),
137
+ Type.Literal("duckduckgo"),
138
+ Type.Literal("bing"),
139
+ ]),
140
+ ),
141
+ }),
142
+ execute: async (params) => {
143
+ const api = await getBrowserApi();
144
+ const res = await api.search(params);
145
+ return { content: [{ type: "text", text: jsonResult(res) }] };
146
+ },
147
+ });
148
+
149
+ // ─── browser_click ────────────────────────────────────────────────
150
+ pi.registerTool({
151
+ name: "browser_click",
152
+ description:
153
+ "Click an element. Pass either `ref` (returned by browser_snapshot) or `selector` (CSS).",
154
+ parameters: Type.Object({
155
+ tabId: Type.String(),
156
+ ref: Type.Optional(Type.String()),
157
+ selector: Type.Optional(Type.String()),
158
+ button: Type.Optional(
159
+ Type.Union([
160
+ Type.Literal("left"),
161
+ Type.Literal("right"),
162
+ Type.Literal("middle"),
163
+ ]),
164
+ ),
165
+ }),
166
+ execute: async (params) => {
167
+ const api = await getBrowserApi();
168
+ const res = await api.click(params);
169
+ return { content: [{ type: "text", text: jsonResult(res) }] };
170
+ },
171
+ });
172
+
173
+ // ─── browser_type ─────────────────────────────────────────────────
174
+ pi.registerTool({
175
+ name: "browser_type",
176
+ description:
177
+ "Type text into an element. Pass `ref` or `selector` to target a specific input; otherwise types into the currently focused element. Set `pressEnter: true` to submit a form.",
178
+ parameters: Type.Object({
179
+ tabId: Type.String(),
180
+ text: Type.String(),
181
+ ref: Type.Optional(Type.String()),
182
+ selector: Type.Optional(Type.String()),
183
+ pressEnter: Type.Optional(Type.Boolean()),
184
+ delayMs: Type.Optional(Type.Number()),
185
+ }),
186
+ execute: async (params) => {
187
+ const api = await getBrowserApi();
188
+ const res = await api.type(params);
189
+ return { content: [{ type: "text", text: jsonResult(res) }] };
190
+ },
191
+ });
192
+
193
+ // ─── browser_scroll ───────────────────────────────────────────────
194
+ pi.registerTool({
195
+ name: "browser_scroll",
196
+ description:
197
+ "Scroll the page (or a specific element by `ref`) by `pixels` in the given direction.",
198
+ parameters: Type.Object({
199
+ tabId: Type.String(),
200
+ direction: Type.Union([
201
+ Type.Literal("up"),
202
+ Type.Literal("down"),
203
+ Type.Literal("left"),
204
+ Type.Literal("right"),
205
+ ]),
206
+ ref: Type.Optional(Type.String()),
207
+ pixels: Type.Optional(Type.Number()),
208
+ }),
209
+ execute: async (params) => {
210
+ const api = await getBrowserApi();
211
+ const res = await api.scroll(params);
212
+ return { content: [{ type: "text", text: jsonResult(res) }] };
213
+ },
214
+ });
215
+
216
+ // ─── browser_snapshot ─────────────────────────────────────────────
217
+ pi.registerTool({
218
+ name: "browser_snapshot",
219
+ description:
220
+ "Return the accessibility tree for the current tab. Each node carries a `ref` that can be passed back to browser_click / browser_type / browser_scroll. Cheaper than parsing HTML.",
221
+ parameters: Type.Object({
222
+ tabId: Type.String(),
223
+ }),
224
+ execute: async (params) => {
225
+ const api = await getBrowserApi();
226
+ const res = await api.snapshot(params);
227
+ return { content: [{ type: "text", text: jsonResult(res) }] };
228
+ },
229
+ });
230
+
231
+ // ─── browser_close_tab ────────────────────────────────────────────
232
+ pi.registerTool({
233
+ name: "browser_close_tab",
234
+ description: "Close a single tab. The browser process stays warm.",
235
+ parameters: Type.Object({
236
+ tabId: Type.String(),
237
+ }),
238
+ execute: async (params) => {
239
+ const api = await getBrowserApi();
240
+ const res = await api.closeTab(params);
241
+ return { content: [{ type: "text", text: jsonResult(res) }] };
242
+ },
243
+ });
244
+
245
+ // ─── browser_list_tabs ────────────────────────────────────────────
246
+ pi.registerTool({
247
+ name: "browser_list_tabs",
248
+ description: "List open tabs for the current user.",
249
+ parameters: Type.Object({
250
+ userId: Type.Optional(Type.String()),
251
+ }),
252
+ execute: async (params) => {
253
+ const api = await getBrowserApi();
254
+ const res = await api.listTabs(params);
255
+ return { content: [{ type: "text", text: jsonResult(res) }] };
256
+ },
257
+ });
258
+
259
+ // ─── Lifecycle: shut the Firefox process down on session shutdown ──
260
+ pi.on("session_shutdown", async () => {
261
+ if (!cachedApi) return;
262
+ try {
263
+ await cachedApi.closeAll();
264
+ } catch {
265
+ // best-effort
266
+ }
267
+ });
268
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@phi-code-admin/phi-code",
3
- "version": "0.75.6",
3
+ "version": "0.76.0",
4
4
  "description": "Coding agent CLI with persistent memory, sub-agents, intelligent routing, and orchestration",
5
5
  "type": "module",
6
6
  "piConfig": {
@@ -47,6 +47,7 @@
47
47
  },
48
48
  "dependencies": {
49
49
  "@mariozechner/jiti": "^2.6.5",
50
+ "@phi-code-admin/browser": "^1.0.0",
50
51
  "@silvia-odwyer/photon-node": "^0.3.4",
51
52
  "chalk": "^5.5.0",
52
53
  "cli-highlight": "^2.1.11",