pi-firecrawl 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +68 -0
- package/extensions/firecrawl.ts +307 -0
- package/package.json +17 -0
package/README.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# pi-firecrawl
|
|
2
|
+
|
|
3
|
+
Firecrawl web scraping extension for [Pi coding agent](https://pi.dev).
|
|
4
|
+
|
|
5
|
+
Provides 5 tools and a `/firecrawl` command for Firecrawl API v2 integration.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pi install npm:pi-firecrawl
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Tools
|
|
14
|
+
|
|
15
|
+
| Tool | Description |
|
|
16
|
+
|------|-------------|
|
|
17
|
+
| `firecrawl_scrape` | Scrape a single URL into markdown, HTML, raw HTML, links, screenshots, or JSON |
|
|
18
|
+
| `firecrawl_crawl` | Start a crawl job to discover and scrape multiple pages |
|
|
19
|
+
| `firecrawl_crawl_status` | Check the status and retrieve data of a crawl job |
|
|
20
|
+
| `firecrawl_map` | Discover all URLs on a website using the map endpoint |
|
|
21
|
+
| `firecrawl_search` | Search the web and return results |
|
|
22
|
+
|
|
23
|
+
## Commands
|
|
24
|
+
|
|
25
|
+
- `/firecrawl` — Select which Firecrawl tools to enable/disable
|
|
26
|
+
|
|
27
|
+
## Environment Variables
|
|
28
|
+
|
|
29
|
+
| Variable | Default | Description |
|
|
30
|
+
|----------|---------|-------------|
|
|
31
|
+
| `FIRECRAWL_API_KEY` | — | Firecrawl API token (fallback if not set via `/firecrawl key`) |
|
|
32
|
+
| `FIRECRAWL_API_URL` | `https://api.firecrawl.dev` | Base URL for Firecrawl API |
|
|
33
|
+
|
|
34
|
+
## API Key Management
|
|
35
|
+
|
|
36
|
+
You can set the Firecrawl API key from within pi (persisted across sessions):
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
/firecrawl key fc-your-api-key-here
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Check or clear the key:
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
/firecrawl key # shows current key status
|
|
46
|
+
/firecrawl key --clear # removes saved key
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**Key resolution order:**
|
|
50
|
+
1. Saved key (via `/firecrawl key`)
|
|
51
|
+
2. `FIRECRAWL_API_KEY` env var
|
|
52
|
+
3. `token` parameter passed per-tool
|
|
53
|
+
|
|
54
|
+
## Usage
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# Scrape a URL
|
|
58
|
+
firecrawl_scrape url="https://example.com"
|
|
59
|
+
|
|
60
|
+
# Search the web
|
|
61
|
+
firecrawl_search query="pi coding agent"
|
|
62
|
+
|
|
63
|
+
# Start a crawl
|
|
64
|
+
firecrawl_crawl url="https://example.com"
|
|
65
|
+
|
|
66
|
+
# Check crawl status
|
|
67
|
+
firecrawl_crawl_status jobId="<job-id>"
|
|
68
|
+
```
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import { Type } from "typebox";
|
|
3
|
+
import * as fs from "node:fs";
|
|
4
|
+
import * as path from "node:path";
|
|
5
|
+
|
|
6
|
+
// Base API URL, can be overridden via FIRECRAWL_API_URL env var
|
|
7
|
+
const FIRECRAWL_BASE_URL = process.env.FIRECRAWL_API_URL ?? "https://api.firecrawl.dev";
|
|
8
|
+
const STATE_KEY = "firecrawl-settings";
|
|
9
|
+
const AUTH_FILE = path.join(process.env.HOME || process.env.USERPROFILE || "~", ".pi", "agent", "auth.json");
|
|
10
|
+
|
|
11
|
+
// Load API key from pi's auth.json
|
|
12
|
+
function loadKey(): string | undefined {
|
|
13
|
+
try {
|
|
14
|
+
const raw = fs.readFileSync(AUTH_FILE, "utf-8");
|
|
15
|
+
const data = JSON.parse(raw);
|
|
16
|
+
const entry = data["firecrawl"];
|
|
17
|
+
if (entry && entry.type === "api_key" && typeof entry.key === "string") {
|
|
18
|
+
return entry.key;
|
|
19
|
+
}
|
|
20
|
+
return undefined;
|
|
21
|
+
} catch {
|
|
22
|
+
return undefined;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function saveKey(key: string) {
|
|
27
|
+
try {
|
|
28
|
+
const raw = fs.existsSync(AUTH_FILE) ? fs.readFileSync(AUTH_FILE, "utf-8") : "{}";
|
|
29
|
+
const data = JSON.parse(raw);
|
|
30
|
+
data["firecrawl"] = { type: "api_key", key };
|
|
31
|
+
const dir = path.dirname(AUTH_FILE);
|
|
32
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
33
|
+
fs.writeFileSync(AUTH_FILE, JSON.stringify(data, null, 2), "utf-8");
|
|
34
|
+
} catch { /* ignore file errors */ }
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function clearKey() {
|
|
38
|
+
try {
|
|
39
|
+
if (!fs.existsSync(AUTH_FILE)) return;
|
|
40
|
+
const raw = fs.readFileSync(AUTH_FILE, "utf-8");
|
|
41
|
+
const data = JSON.parse(raw);
|
|
42
|
+
delete data["firecrawl"];
|
|
43
|
+
fs.writeFileSync(AUTH_FILE, JSON.stringify(data, null, 2), "utf-8");
|
|
44
|
+
} catch { /* ignore */ }
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Resolve API token: auth.json > env var > parameter
|
|
48
|
+
function resolveToken(paramToken?: string): string | undefined {
|
|
49
|
+
return loadKey() ?? process.env.FIRECRAWL_API_KEY ?? paramToken;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function firecrawlRequest(
|
|
53
|
+
endpoint: string,
|
|
54
|
+
token: string,
|
|
55
|
+
body: unknown,
|
|
56
|
+
signal?: AbortSignal
|
|
57
|
+
) {
|
|
58
|
+
const response = await fetch(`${FIRECRAWL_BASE_URL}${endpoint}`, {
|
|
59
|
+
method: "POST",
|
|
60
|
+
headers: {
|
|
61
|
+
Authorization: `Bearer ${token}`,
|
|
62
|
+
"Content-Type": "application/json",
|
|
63
|
+
},
|
|
64
|
+
body: JSON.stringify(body),
|
|
65
|
+
signal,
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
if (!response.ok) {
|
|
69
|
+
const errorText = await response.text();
|
|
70
|
+
throw new Error(`Firecrawl API error ${response.status}: ${errorText}`);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return response.json();
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async function firecrawlGet(
|
|
77
|
+
endpoint: string,
|
|
78
|
+
token: string,
|
|
79
|
+
signal?: AbortSignal
|
|
80
|
+
) {
|
|
81
|
+
const response = await fetch(`${FIRECRAWL_BASE_URL}${endpoint}`, {
|
|
82
|
+
headers: {
|
|
83
|
+
Authorization: `Bearer ${token}`,
|
|
84
|
+
},
|
|
85
|
+
signal,
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
if (!response.ok) {
|
|
89
|
+
const errorText = await response.text();
|
|
90
|
+
throw new Error(`Firecrawl API error ${response.status}: ${errorText}`);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return response.json();
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export default function (pi: ExtensionAPI) {
|
|
97
|
+
let activeFirecrawlTools: string[] = ["firecrawl_scrape", "firecrawl_crawl", "firecrawl_crawl_status", "firecrawl_map", "firecrawl_search"];
|
|
98
|
+
|
|
99
|
+
// Restore state on session start
|
|
100
|
+
pi.on("session_start", async (_event, ctx) => {
|
|
101
|
+
for (const entry of ctx.sessionManager.getEntries()) {
|
|
102
|
+
if (entry.type === "custom" && entry.customType === STATE_KEY) {
|
|
103
|
+
if (Array.isArray(entry.data)) {
|
|
104
|
+
activeFirecrawlTools = entry.data;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
updateActiveTools();
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
function updateActiveTools() {
|
|
112
|
+
const allTools = pi.getAllTools();
|
|
113
|
+
const nonFirecrawlTools = allTools
|
|
114
|
+
.filter(t => !t.name.startsWith("firecrawl_"))
|
|
115
|
+
.map(t => t.name);
|
|
116
|
+
|
|
117
|
+
pi.setActiveTools([...nonFirecrawlTools, ...activeFirecrawlTools]);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// --- Tools ---
|
|
121
|
+
|
|
122
|
+
pi.registerTool({
|
|
123
|
+
name: "firecrawl_scrape",
|
|
124
|
+
label: "Firecrawl Scrape",
|
|
125
|
+
description: "Scrape a single URL into markdown, HTML, raw HTML, links, screenshots, or JSON",
|
|
126
|
+
parameters: Type.Object({
|
|
127
|
+
url: Type.String({ description: "URL to scrape" }),
|
|
128
|
+
formats: Type.Optional(
|
|
129
|
+
Type.Array(Type.String(), { description: "Output formats (markdown, html, rawHtml, links, screenshot, json)" })
|
|
130
|
+
),
|
|
131
|
+
token: Type.Optional(Type.String({ description: "Firecrawl API token (defaults to saved key, then FIRECRAWL_API_KEY env var)" })),
|
|
132
|
+
}),
|
|
133
|
+
async execute(toolCallId, params, signal, onUpdate, ctx) {
|
|
134
|
+
const token = resolveToken(params.token);
|
|
135
|
+
if (!token) return { content: [{ type: "text", text: "Firecrawl API token not provided. Set one with `/firecrawl key <token>` or set FIRECRAWL_API_KEY env var." }], isError: true };
|
|
136
|
+
|
|
137
|
+
onUpdate?.({ content: [{ type: "text", text: `Scraping ${params.url}...` }] });
|
|
138
|
+
|
|
139
|
+
try {
|
|
140
|
+
const data = await firecrawlRequest("/v2/scrape", token, {
|
|
141
|
+
url: params.url,
|
|
142
|
+
formats: params.formats || ["markdown"],
|
|
143
|
+
}, signal);
|
|
144
|
+
return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
|
|
145
|
+
} catch (err: any) {
|
|
146
|
+
return { content: [{ type: "text", text: `Firecrawl scrape failed: ${err.message}` }], isError: true };
|
|
147
|
+
}
|
|
148
|
+
},
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
pi.registerTool({
|
|
152
|
+
name: "firecrawl_crawl",
|
|
153
|
+
label: "Firecrawl Crawl",
|
|
154
|
+
description: "Start a crawl job to discover and scrape multiple pages",
|
|
155
|
+
parameters: Type.Object({
|
|
156
|
+
url: Type.String({ description: "Base URL to start crawl from" }),
|
|
157
|
+
token: Type.Optional(Type.String({ description: "Firecrawl API token (defaults to saved key, then FIRECRAWL_API_KEY env var)" })),
|
|
158
|
+
}),
|
|
159
|
+
async execute(toolCallId, params, signal, onUpdate, ctx) {
|
|
160
|
+
const token = resolveToken(params.token);
|
|
161
|
+
if (!token) return { content: [{ type: "text", text: "Firecrawl API token not provided. Set one with `/firecrawl key <token>` or set FIRECRAWL_API_KEY env var." }], isError: true };
|
|
162
|
+
|
|
163
|
+
onUpdate?.({ content: [{ type: "text", text: `Starting crawl from ${params.url}...` }] });
|
|
164
|
+
|
|
165
|
+
try {
|
|
166
|
+
const data = await firecrawlRequest("/v2/crawl", token, { url: params.url }, signal);
|
|
167
|
+
return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
|
|
168
|
+
} catch (err: any) {
|
|
169
|
+
return { content: [{ type: "text", text: `Firecrawl crawl failed: ${err.message}` }], isError: true };
|
|
170
|
+
}
|
|
171
|
+
},
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
pi.registerTool({
|
|
175
|
+
name: "firecrawl_crawl_status",
|
|
176
|
+
label: "Firecrawl Crawl Status",
|
|
177
|
+
description: "Check the status and retrieve data of a crawl job",
|
|
178
|
+
parameters: Type.Object({
|
|
179
|
+
jobId: Type.String({ description: "The ID of the crawl job" }),
|
|
180
|
+
token: Type.Optional(Type.String({ description: "Firecrawl API token (defaults to saved key, then FIRECRAWL_API_KEY env var)" })),
|
|
181
|
+
}),
|
|
182
|
+
async execute(toolCallId, params, signal, onUpdate, ctx) {
|
|
183
|
+
const token = resolveToken(params.token);
|
|
184
|
+
if (!token) return { content: [{ type: "text", text: "Firecrawl API token not provided. Set one with `/firecrawl key <token>` or set FIRECRAWL_API_KEY env var." }], isError: true };
|
|
185
|
+
|
|
186
|
+
onUpdate?.({ content: [{ type: "text", text: `Checking status of job ${params.jobId}...` }] });
|
|
187
|
+
|
|
188
|
+
try {
|
|
189
|
+
const data = await firecrawlGet(`/v2/crawl/${params.jobId}`, token, signal);
|
|
190
|
+
return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
|
|
191
|
+
} catch (err: any) {
|
|
192
|
+
return { content: [{ type: "text", text: `Firecrawl status check failed: ${err.message}` }], isError: true };
|
|
193
|
+
}
|
|
194
|
+
},
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
pi.registerTool({
|
|
198
|
+
name: "firecrawl_map",
|
|
199
|
+
label: "Firecrawl Map",
|
|
200
|
+
description: "Discover all URLs on a website using the map endpoint",
|
|
201
|
+
parameters: Type.Object({
|
|
202
|
+
url: Type.String({ description: "Base URL to map" }),
|
|
203
|
+
token: Type.Optional(Type.String({ description: "Firecrawl API token (defaults to saved key, then FIRECRAWL_API_KEY env var)" })),
|
|
204
|
+
}),
|
|
205
|
+
async execute(toolCallId, params, signal, onUpdate, ctx) {
|
|
206
|
+
const token = resolveToken(params.token);
|
|
207
|
+
if (!token) return { content: [{ type: "text", text: "Firecrawl API token not provided. Set one with `/firecrawl key <token>` or set FIRECRAWL_API_KEY env var." }], isError: true };
|
|
208
|
+
|
|
209
|
+
onUpdate?.({ content: [{ type: "text", text: `Mapping ${params.url}...` }] });
|
|
210
|
+
|
|
211
|
+
try {
|
|
212
|
+
const data = await firecrawlRequest("/v2/map", token, { url: params.url }, signal);
|
|
213
|
+
return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
|
|
214
|
+
} catch (err: any) {
|
|
215
|
+
return { content: [{ type: "text", text: `Firecrawl map failed: ${err.message}` }], isError: true };
|
|
216
|
+
}
|
|
217
|
+
},
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
pi.registerTool({
|
|
221
|
+
name: "firecrawl_search",
|
|
222
|
+
label: "Firecrawl Search",
|
|
223
|
+
description: "Search the web and return results",
|
|
224
|
+
parameters: Type.Object({
|
|
225
|
+
query: Type.String({ description: "Search query" }),
|
|
226
|
+
limit: Type.Optional(Type.Number({ description: "Number of results to return" })),
|
|
227
|
+
token: Type.Optional(Type.String({ description: "Firecrawl API token (defaults to saved key, then FIRECRAWL_API_KEY env var)" })),
|
|
228
|
+
}),
|
|
229
|
+
async execute(toolCallId, params, signal, onUpdate, ctx) {
|
|
230
|
+
const token = resolveToken(params.token);
|
|
231
|
+
if (!token) return { content: [{ type: "text", text: "Firecrawl API token not provided. Set one with `/firecrawl key <token>` or set FIRECRAWL_API_KEY env var." }], isError: true };
|
|
232
|
+
|
|
233
|
+
onUpdate?.({ content: [{ type: "text", text: `Searching for "${params.query}"...` }] });
|
|
234
|
+
|
|
235
|
+
try {
|
|
236
|
+
const data = await firecrawlRequest("/v2/search", token, {
|
|
237
|
+
query: params.query,
|
|
238
|
+
limit: params.limit || 5,
|
|
239
|
+
}, signal);
|
|
240
|
+
return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
|
|
241
|
+
} catch (err: any) {
|
|
242
|
+
return { content: [{ type: "text", text: `Firecrawl search failed: ${err.message}` }], isError: true };
|
|
243
|
+
}
|
|
244
|
+
},
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
// --- Command ---
|
|
248
|
+
|
|
249
|
+
pi.registerCommand("firecrawl", {
|
|
250
|
+
description: "Firecrawl configuration. Use `/firecrawl` to select tools, `/firecrawl key <token>` to set API key, `/firecrawl key --clear` to remove it.",
|
|
251
|
+
handler: async (args, ctx) => {
|
|
252
|
+
const parts = args.trim().split(/\s+/);
|
|
253
|
+
|
|
254
|
+
// /firecrawl key <value>
|
|
255
|
+
if (parts[0] === "key") {
|
|
256
|
+
if (parts.length >= 2 && parts[1] === "--clear") {
|
|
257
|
+
clearKey();
|
|
258
|
+
ctx.ui.notify("Firecrawl API key cleared", "info");
|
|
259
|
+
} else if (parts.length >= 2) {
|
|
260
|
+
const key = parts.slice(1).join(" ");
|
|
261
|
+
saveKey(key);
|
|
262
|
+
ctx.ui.notify("Firecrawl API key saved", "info");
|
|
263
|
+
} else {
|
|
264
|
+
const currentKey = loadKey();
|
|
265
|
+
if (currentKey) {
|
|
266
|
+
ctx.ui.notify(`Firecrawl API key is set (${currentKey.slice(0, 8)}...)`, "info");
|
|
267
|
+
} else if (process.env.FIRECRAWL_API_KEY) {
|
|
268
|
+
ctx.ui.notify(`Using FIRECRAWL_API_KEY env var (${process.env.FIRECRAWL_API_KEY.slice(0, 8)}...)`, "info");
|
|
269
|
+
} else {
|
|
270
|
+
ctx.ui.notify("No Firecrawl API key set. Use `/firecrawl key <token>` to set one.", "info");
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
return;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// /firecrawl (tool selector)
|
|
277
|
+
const allFirecrawlTools = ["firecrawl_scrape", "firecrawl_crawl", "firecrawl_crawl_status", "firecrawl_map", "firecrawl_search"];
|
|
278
|
+
|
|
279
|
+
const choice = await ctx.ui.select(
|
|
280
|
+
"Select Firecrawl tools to enable",
|
|
281
|
+
allFirecrawlTools,
|
|
282
|
+
{ multiple: true }
|
|
283
|
+
);
|
|
284
|
+
|
|
285
|
+
if (choice) {
|
|
286
|
+
activeFirecrawlTools = choice as string[];
|
|
287
|
+
pi.appendEntry(STATE_KEY, activeFirecrawlTools);
|
|
288
|
+
updateActiveTools();
|
|
289
|
+
ctx.ui.notify(`Firecrawl tools updated`, "info");
|
|
290
|
+
}
|
|
291
|
+
},
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
// --- Status Indicator ---
|
|
295
|
+
|
|
296
|
+
pi.on("tool_execution_start", async (event, ctx) => {
|
|
297
|
+
if (event.toolName?.startsWith("firecrawl_")) {
|
|
298
|
+
ctx.ui.setStatus("firecrawl", "🔥 Firecrawl active...");
|
|
299
|
+
}
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
pi.on("tool_execution_end", async (event, ctx) => {
|
|
303
|
+
if (event.toolName?.startsWith("firecrawl_")) {
|
|
304
|
+
ctx.ui.setStatus("firecrawl", "");
|
|
305
|
+
}
|
|
306
|
+
});
|
|
307
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "pi-firecrawl",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Firecrawl web scraping tools for Pi coding agent - scrape, crawl, map, and search with Firecrawl API v2",
|
|
5
|
+
"keywords": ["pi-package", "firecrawl", "web-scraping", "crawl", "pi-extension"],
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"type": "module",
|
|
8
|
+
"pi": {
|
|
9
|
+
"extensions": ["./extensions"],
|
|
10
|
+
"video": "",
|
|
11
|
+
"image": ""
|
|
12
|
+
},
|
|
13
|
+
"peerDependencies": {
|
|
14
|
+
"@earendil-works/pi-coding-agent": "*",
|
|
15
|
+
"typebox": "*"
|
|
16
|
+
}
|
|
17
|
+
}
|