edgecrawl 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -9
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -166,9 +166,17 @@ const result = await scrapeAndExtract("https://example.com");
|
|
|
166
166
|
// Custom schema
|
|
167
167
|
const product = await scrapeAndExtract("https://shop.example.com/item", {
|
|
168
168
|
schema: {
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
169
|
+
type: "object",
|
|
170
|
+
properties: {
|
|
171
|
+
name: { type: "string", description: "Product name" },
|
|
172
|
+
price: { type: "number", description: "Price (numeric)" },
|
|
173
|
+
features: {
|
|
174
|
+
type: "array",
|
|
175
|
+
items: { type: "string" },
|
|
176
|
+
description: "Key features or specs",
|
|
177
|
+
},
|
|
178
|
+
},
|
|
179
|
+
required: ["name", "price"],
|
|
172
180
|
},
|
|
173
181
|
});
|
|
174
182
|
|
|
@@ -213,12 +221,20 @@ Define what data to extract by providing a JSON schema file:
|
|
|
213
221
|
|
|
214
222
|
```json
|
|
215
223
|
{
|
|
216
|
-
"
|
|
217
|
-
"
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
224
|
+
"type": "object",
|
|
225
|
+
"properties": {
|
|
226
|
+
"name": { "type": "string", "description": "Product name" },
|
|
227
|
+
"price": { "type": "number", "description": "Price (numeric)" },
|
|
228
|
+
"currency": { "type": "string", "description": "Currency code (e.g. USD, EUR, JPY)" },
|
|
229
|
+
"description": { "type": "string", "description": "Product description (1-3 sentences)" },
|
|
230
|
+
"features": {
|
|
231
|
+
"type": "array",
|
|
232
|
+
"items": { "type": "string" },
|
|
233
|
+
"description": "Key features or specs"
|
|
234
|
+
},
|
|
235
|
+
"availability": { "type": "string", "description": "Stock status (in stock, out of stock, etc.)" }
|
|
236
|
+
},
|
|
237
|
+
"required": ["name", "price", "currency"]
|
|
222
238
|
}
|
|
223
239
|
```
|
|
224
240
|
|
|
@@ -247,6 +263,16 @@ All models run locally via ONNX Runtime. First run downloads the model to `.mode
|
|
|
247
263
|
| LLM | Transformers.js v4 + Qwen3 ONNX | Local structured extraction |
|
|
248
264
|
| CLI | Commander.js | Command-line interface |
|
|
249
265
|
|
|
266
|
+
## AI Agent Skill
|
|
267
|
+
|
|
268
|
+
A skill file is included for AI coding agents. Install it to let your agent use edgecrawl directly:
|
|
269
|
+
|
|
270
|
+
```bash
|
|
271
|
+
npx skills add couzip/edgecrawl
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
Once installed, your AI agent can scrape websites and extract structured data using edgecrawl.
|
|
275
|
+
|
|
250
276
|
## Requirements
|
|
251
277
|
|
|
252
278
|
- Node.js >= 20.0.0
|
package/package.json
CHANGED