openclaw-workflowskill 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -11
- package/index.ts +78 -0
- package/lib/adapters.ts +10 -3
- package/lib/openclaw-context.md +41 -0
- package/package.json +6 -3
- package/tools/fetch_raw.ts +89 -0
- package/tools/scrape.ts +69 -0
package/README.md
CHANGED
|
@@ -42,7 +42,7 @@ openclaw plugins install openclaw-workflowskill
|
|
|
42
42
|
|
|
43
43
|
```bash
|
|
44
44
|
openclaw config set plugins.allow '["openclaw-workflowskill"]'
|
|
45
|
-
openclaw config set tools.alsoAllow '["openclaw-workflowskill"]'
|
|
45
|
+
openclaw config set tools.alsoAllow '["openclaw-workflowskill", "web_fetch"]'
|
|
46
46
|
```
|
|
47
47
|
|
|
48
48
|
The first command allowlists the plugin so the gateway loads it. The second makes its tools available in every session — including cron jobs — so agents can invoke `workflowskill_run` to execute workflows autonomously.
|
|
@@ -59,7 +59,7 @@ The gateway loads plugins and config at startup. A restart is required to pick u
|
|
|
59
59
|
|
|
60
60
|
```bash
|
|
61
61
|
openclaw plugins list
|
|
62
|
-
# → workflowskill:
|
|
62
|
+
# → workflowskill: 6 tools registered
|
|
63
63
|
|
|
64
64
|
openclaw skills list
|
|
65
65
|
# → workflowskill-author (user-invocable)
|
|
@@ -105,14 +105,16 @@ Your agent will use `"model": "haiku"` for cron jobs, ensuring execution is chea
|
|
|
105
105
|
|
|
106
106
|
## Tools
|
|
107
107
|
|
|
108
|
-
Registers
|
|
108
|
+
Registers six tools with the OpenClaw agent:
|
|
109
109
|
|
|
110
|
-
| Tool
|
|
111
|
-
|
|
|
112
|
-
| `workflowskill_validate`
|
|
113
|
-
| `workflowskill_run`
|
|
114
|
-
| `workflowskill_runs`
|
|
115
|
-
| `workflowskill_llm`
|
|
110
|
+
| Tool | Description |
|
|
111
|
+
| -------------------------- | ------------------------------------------------------------------ |
|
|
112
|
+
| `workflowskill_validate` | Parse and validate a SKILL.md or raw YAML workflow |
|
|
113
|
+
| `workflowskill_run` | Execute a workflow and return a compact run summary |
|
|
114
|
+
| `workflowskill_runs` | List and inspect past run logs |
|
|
115
|
+
| `workflowskill_llm` | Call Anthropic directly for inline LLM reasoning in workflows |
|
|
116
|
+
| `workflowskill_fetch_raw` | HTTP fetch returning `{ status, headers, body }` with JSON parsed |
|
|
117
|
+
| `workflowskill_scrape` | Fetch a web page and extract text via CSS selectors |
|
|
116
118
|
|
|
117
119
|
Also ships the `/workflowskill-author` skill — just say "I want to automate X" and the agent handles the rest: researching, writing, validating, and test-running the workflow in chat.
|
|
118
120
|
|
|
@@ -135,9 +137,9 @@ Review past runs via `workflowskill_runs`.
|
|
|
135
137
|
|
|
136
138
|
Workflow `tool` steps are forwarded to the **OpenClaw gateway** via `POST /tools/invoke`. Any tool registered with the gateway is available to a workflow — the plugin sends the tool name and args as JSON and returns the result. Gateway auth (`config.gateway.auth.token`) must be configured or the plugin will refuse to start.
|
|
137
139
|
|
|
138
|
-
The `workflowskill_llm`
|
|
140
|
+
The `workflowskill_llm`, `workflowskill_fetch_raw`, and `workflowskill_scrape` tools are built-in: they call external services directly without going through the gateway adapter.
|
|
139
141
|
|
|
140
|
-
Only `workflowskill_run` is blocked from gateway forwarding — it is self-referencing and would create infinite recursion. The other
|
|
142
|
+
Only `workflowskill_run` is blocked from gateway forwarding — it is self-referencing and would create infinite recursion. The other plugin tools (`workflowskill_validate`, `workflowskill_runs`, `workflowskill_llm`, `workflowskill_fetch_raw`, `workflowskill_scrape`) are leaf operations and are forwarded normally.
|
|
141
143
|
|
|
142
144
|
## Tool Reference
|
|
143
145
|
|
|
@@ -186,6 +188,31 @@ Call Anthropic directly and return the text response. Uses the API key from Open
|
|
|
186
188
|
|
|
187
189
|
Returns the LLM response as a plain text string.
|
|
188
190
|
|
|
191
|
+
### `workflowskill_fetch_raw`
|
|
192
|
+
|
|
193
|
+
Make an HTTP request and return the raw response with JSON auto-parsed. Use this instead of `web_fetch` when you need structured data from a JSON API — `web_fetch` converts responses to markdown, destroying JSON structure.
|
|
194
|
+
|
|
195
|
+
| Param | Type | Required | Description |
|
|
196
|
+
| --------- | ------ | -------- | -------------------------------------------------------------- |
|
|
197
|
+
| `url` | string | yes | The URL to fetch (http or https) |
|
|
198
|
+
| `method` | string | no | HTTP method — GET, POST, PUT, PATCH, DELETE, HEAD, OPTIONS |
|
|
199
|
+
| `headers` | object | no | Request headers as key-value pairs |
|
|
200
|
+
| `body` | string | no | Request body string (e.g. `JSON.stringify(...)` for POST/PUT) |
|
|
201
|
+
|
|
202
|
+
Returns `{ status, headers, body }`. `body` is a parsed object for `application/json` responses, or a raw string otherwise. Network errors return `{ status: 0, headers: {}, body: "<error message>" }` so workflows can branch on failure.
|
|
203
|
+
|
|
204
|
+
### `workflowskill_scrape`
|
|
205
|
+
|
|
206
|
+
Fetch a web page and extract structured data using CSS selectors.
|
|
207
|
+
|
|
208
|
+
| Param | Type | Required | Description |
|
|
209
|
+
| ----------- | ------ | -------- | -------------------------------------------------------------------- |
|
|
210
|
+
| `url` | string | yes | The page URL to fetch (http or https) |
|
|
211
|
+
| `selectors` | object | yes | Named CSS selectors, e.g. `{ "title": "h1", "prices": "span.price" }` |
|
|
212
|
+
| `headers` | object | no | Custom request headers as key-value pairs |
|
|
213
|
+
|
|
214
|
+
Returns `{ status, results }` where `results` maps each selector name to an array of matching text values. Errors return `{ status: 0, error: "<message>" }`.
|
|
215
|
+
|
|
189
216
|
## Development
|
|
190
217
|
|
|
191
218
|
The plugin imports from `workflowskill`, installed from npm. No build step is required for type checking:
|
package/index.ts
CHANGED
|
@@ -10,6 +10,8 @@ import { validateHandler } from './tools/validate.js';
|
|
|
10
10
|
import { runHandler } from './tools/run.js';
|
|
11
11
|
import { runsHandler } from './tools/runs.js';
|
|
12
12
|
import { llmHandler } from './tools/llm.js';
|
|
13
|
+
import { fetchRawHandler } from './tools/fetch_raw.js';
|
|
14
|
+
import { scrapeHandler } from './tools/scrape.js';
|
|
13
15
|
import { createToolAdapter, type GatewayConfig } from './lib/adapters.js';
|
|
14
16
|
|
|
15
17
|
// ─── OpenClaw plugin API types ─────────────────────────────────────────────
|
|
@@ -231,5 +233,81 @@ export default {
|
|
|
231
233
|
},
|
|
232
234
|
});
|
|
233
235
|
|
|
236
|
+
// ── workflowskill_scrape ──────────────────────────────────────────────
|
|
237
|
+
registerTool({
|
|
238
|
+
name: 'workflowskill_scrape',
|
|
239
|
+
description:
|
|
240
|
+
'Fetch a web page and extract structured data using CSS selectors. ' +
|
|
241
|
+
'Returns { status, results } where results maps each selector name to an array of matching text values. ' +
|
|
242
|
+
'Use when you need to extract specific content from HTML pages — ' +
|
|
243
|
+
'supply named selectors like { "title": "h1", "prices": "span.price" }. ' +
|
|
244
|
+
'Errors return { status: 0, error: "<message>" }.',
|
|
245
|
+
parameters: {
|
|
246
|
+
type: 'object',
|
|
247
|
+
properties: {
|
|
248
|
+
url: {
|
|
249
|
+
type: 'string',
|
|
250
|
+
description: 'The page URL to fetch (http or https).',
|
|
251
|
+
},
|
|
252
|
+
selectors: {
|
|
253
|
+
type: 'object',
|
|
254
|
+
description: 'Map of named CSS selectors, e.g. { "title": "h1", "prices": "span.price" }.',
|
|
255
|
+
},
|
|
256
|
+
headers: {
|
|
257
|
+
type: 'object',
|
|
258
|
+
description: 'Custom request headers as key-value pairs. Optional.',
|
|
259
|
+
},
|
|
260
|
+
},
|
|
261
|
+
required: ['url', 'selectors'],
|
|
262
|
+
},
|
|
263
|
+
execute: async (_id, params) => {
|
|
264
|
+
return toContent(
|
|
265
|
+
await scrapeHandler(
|
|
266
|
+
params as { url: string; selectors: Record<string, string>; headers?: Record<string, string> },
|
|
267
|
+
),
|
|
268
|
+
);
|
|
269
|
+
},
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
// ── workflowskill_fetch_raw ───────────────────────────────────────────
|
|
273
|
+
registerTool({
|
|
274
|
+
name: 'workflowskill_fetch_raw',
|
|
275
|
+
description:
|
|
276
|
+
'Make an HTTP request and return { status, headers, body } with JSON auto-parsed. ' +
|
|
277
|
+
'Use this instead of web_fetch when you need structured JSON from an API — ' +
|
|
278
|
+
'web_fetch converts responses to markdown, destroying JSON structure. ' +
|
|
279
|
+
'body is a parsed object for application/json responses, or a raw string otherwise. ' +
|
|
280
|
+
'Network errors return { status: 0, body: "<error message>" } so workflows can branch on failure.',
|
|
281
|
+
parameters: {
|
|
282
|
+
type: 'object',
|
|
283
|
+
properties: {
|
|
284
|
+
url: {
|
|
285
|
+
type: 'string',
|
|
286
|
+
description: 'The URL to fetch (http or https).',
|
|
287
|
+
},
|
|
288
|
+
method: {
|
|
289
|
+
type: 'string',
|
|
290
|
+
description: 'HTTP method (GET, POST, PUT, PATCH, DELETE, HEAD, OPTIONS). Defaults to GET.',
|
|
291
|
+
},
|
|
292
|
+
headers: {
|
|
293
|
+
type: 'object',
|
|
294
|
+
description: 'Request headers as key-value pairs. Optional.',
|
|
295
|
+
},
|
|
296
|
+
body: {
|
|
297
|
+
type: 'string',
|
|
298
|
+
description: 'Request body as a string (e.g. JSON.stringify output). Optional.',
|
|
299
|
+
},
|
|
300
|
+
},
|
|
301
|
+
required: ['url'],
|
|
302
|
+
},
|
|
303
|
+
execute: async (_id, params) => {
|
|
304
|
+
return toContent(
|
|
305
|
+
await fetchRawHandler(
|
|
306
|
+
params as { url: string; method?: string; headers?: Record<string, string>; body?: string },
|
|
307
|
+
),
|
|
308
|
+
);
|
|
309
|
+
},
|
|
310
|
+
});
|
|
311
|
+
|
|
234
312
|
},
|
|
235
313
|
};
|
package/lib/adapters.ts
CHANGED
|
@@ -6,15 +6,22 @@ import type { ToolAdapter, ToolDescriptor, ToolResult } from 'workflowskill';
|
|
|
6
6
|
|
|
7
7
|
/** Unwrap MCP text-content envelope so workflows see actual tool output. */
|
|
8
8
|
function unwrapMcpContent(body: unknown): unknown {
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
// Peel outer { ok, result } gateway envelope if present.
|
|
10
|
+
let inner: unknown = body;
|
|
11
|
+
if (inner !== null && typeof inner === 'object' && 'ok' in inner && 'result' in inner) {
|
|
12
|
+
inner = (inner as { ok: unknown; result: unknown }).result;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
// Peel MCP { content: [{ type: 'text', text: '...' }] } envelope.
|
|
16
|
+
if (inner !== null && typeof inner === 'object' && 'content' in inner) {
|
|
17
|
+
const { content } = inner as { content: unknown };
|
|
11
18
|
if (Array.isArray(content) && content.length === 1
|
|
12
19
|
&& content[0]?.type === 'text' && typeof content[0]?.text === 'string') {
|
|
13
20
|
try { return JSON.parse(content[0].text); }
|
|
14
21
|
catch { return content[0].text; }
|
|
15
22
|
}
|
|
16
23
|
}
|
|
17
|
-
return
|
|
24
|
+
return inner;
|
|
18
25
|
}
|
|
19
26
|
|
|
20
27
|
export interface GatewayConfig {
|
package/lib/openclaw-context.md
CHANGED
|
@@ -34,3 +34,44 @@ Always set `"model": "haiku"` on cron payloads — cron runs are lightweight orc
|
|
|
34
34
|
> ```
|
|
35
35
|
>
|
|
36
36
|
> Without this, cron sessions cannot invoke `workflowskill_run` and will fail silently.
|
|
37
|
+
|
|
38
|
+
### Fetching Raw API Data
|
|
39
|
+
|
|
40
|
+
Use `workflowskill_fetch_raw` when a workflow step needs structured data from an HTTP API. Unlike `web_fetch`, which converts responses to markdown (destroying JSON structure), `workflowskill_fetch_raw` returns a parsed object for `application/json` responses.
|
|
41
|
+
|
|
42
|
+
**Return shape:**
|
|
43
|
+
```json
|
|
44
|
+
{ "status": 200, "headers": { "content-type": "application/json" }, "body": { ... } }
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Access response data via `$result.body.<field>`. Network errors return `status: 0` and a string `body` describing the error, so workflows can branch on failure.
|
|
48
|
+
|
|
49
|
+
**GET request (JSON API):**
|
|
50
|
+
```yaml
|
|
51
|
+
steps:
|
|
52
|
+
- id: fetch_jobs
|
|
53
|
+
type: tool
|
|
54
|
+
tool: workflowskill_fetch_raw
|
|
55
|
+
params:
|
|
56
|
+
url: "https://boards-api.greenhouse.io/v1/boards/intrinsic/jobs"
|
|
57
|
+
- id: count_jobs
|
|
58
|
+
type: tool
|
|
59
|
+
tool: workflowskill_llm
|
|
60
|
+
params:
|
|
61
|
+
prompt: "There are {{ steps.fetch_jobs.result.body.jobs.length }} jobs."
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
**POST request with JSON body:**
|
|
65
|
+
```yaml
|
|
66
|
+
steps:
|
|
67
|
+
- id: create_item
|
|
68
|
+
type: tool
|
|
69
|
+
tool: workflowskill_fetch_raw
|
|
70
|
+
params:
|
|
71
|
+
url: "https://api.example.com/items"
|
|
72
|
+
method: POST
|
|
73
|
+
headers:
|
|
74
|
+
Content-Type: application/json
|
|
75
|
+
Authorization: "Bearer {{ inputs.token }}"
|
|
76
|
+
body: '{"name": "{{ inputs.name }}"}'
|
|
77
|
+
```
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "openclaw-workflowskill",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "WorkflowSkill plugin for OpenClaw — author, validate, run, and review YAML workflows",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|
|
@@ -35,7 +35,8 @@
|
|
|
35
35
|
]
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
|
-
"
|
|
38
|
+
"cheerio": "^1.0.0",
|
|
39
|
+
"workflowskill": "^0.5.0"
|
|
39
40
|
},
|
|
40
41
|
"devDependencies": {
|
|
41
42
|
"@types/node": "^25.3.0",
|
|
@@ -44,6 +45,8 @@
|
|
|
44
45
|
},
|
|
45
46
|
"scripts": {
|
|
46
47
|
"typecheck": "tsc --noEmit --project tsconfig.json",
|
|
47
|
-
"prepublishOnly": "tsc --noEmit --project tsconfig.json"
|
|
48
|
+
"prepublishOnly": "tsc --noEmit --project tsconfig.json",
|
|
49
|
+
"dev:link": "./scripts/dev-link.sh",
|
|
50
|
+
"dev:unlink": "./scripts/dev-unlink.sh"
|
|
48
51
|
}
|
|
49
52
|
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
// workflowskill_fetch_raw — HTTP fetch that preserves JSON structure.
|
|
2
|
+
|
|
3
|
+
const ALLOWED_METHODS = new Set(['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'HEAD', 'OPTIONS']);
|
|
4
|
+
const TIMEOUT_MS = 30_000;
|
|
5
|
+
const MAX_BYTES = 10 * 1024 * 1024; // 10 MB
|
|
6
|
+
|
|
7
|
+
export interface FetchRawParams {
|
|
8
|
+
url: string;
|
|
9
|
+
method?: string;
|
|
10
|
+
headers?: Record<string, string>;
|
|
11
|
+
body?: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface FetchRawResult {
|
|
15
|
+
status: number;
|
|
16
|
+
headers: Record<string, string>;
|
|
17
|
+
body: unknown;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export async function fetchRawHandler(params: FetchRawParams): Promise<FetchRawResult> {
|
|
21
|
+
const { url, method = 'GET', headers = {}, body } = params;
|
|
22
|
+
|
|
23
|
+
// Protocol validation
|
|
24
|
+
let parsed: URL;
|
|
25
|
+
try {
|
|
26
|
+
parsed = new URL(url);
|
|
27
|
+
} catch {
|
|
28
|
+
return { status: 0, headers: {}, body: `Invalid URL: ${url}` };
|
|
29
|
+
}
|
|
30
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
|
|
31
|
+
return { status: 0, headers: {}, body: `Unsupported protocol: ${parsed.protocol}` };
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Method validation
|
|
35
|
+
const upperMethod = method.toUpperCase();
|
|
36
|
+
if (!ALLOWED_METHODS.has(upperMethod)) {
|
|
37
|
+
return { status: 0, headers: {}, body: `Unsupported method: ${method}` };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
let response: Response;
|
|
41
|
+
try {
|
|
42
|
+
response = await fetch(url, {
|
|
43
|
+
method: upperMethod,
|
|
44
|
+
headers,
|
|
45
|
+
body: body !== undefined ? body : undefined,
|
|
46
|
+
signal: AbortSignal.timeout(TIMEOUT_MS),
|
|
47
|
+
});
|
|
48
|
+
} catch (err) {
|
|
49
|
+
return { status: 0, headers: {}, body: err instanceof Error ? err.message : String(err) };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Collect response headers
|
|
53
|
+
const responseHeaders: Record<string, string> = {};
|
|
54
|
+
response.headers.forEach((value, key) => {
|
|
55
|
+
responseHeaders[key] = value;
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
// Read with size guard
|
|
59
|
+
let rawBody: string;
|
|
60
|
+
try {
|
|
61
|
+
const buffer = await response.arrayBuffer();
|
|
62
|
+
if (buffer.byteLength > MAX_BYTES) {
|
|
63
|
+
return {
|
|
64
|
+
status: response.status,
|
|
65
|
+
headers: responseHeaders,
|
|
66
|
+
body: `Response too large: ${buffer.byteLength} bytes (max ${MAX_BYTES})`,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
rawBody = new TextDecoder().decode(buffer);
|
|
70
|
+
} catch (err) {
|
|
71
|
+
return {
|
|
72
|
+
status: response.status,
|
|
73
|
+
headers: responseHeaders,
|
|
74
|
+
body: err instanceof Error ? err.message : String(err),
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Auto-parse JSON
|
|
79
|
+
const contentType = response.headers.get('content-type') ?? '';
|
|
80
|
+
if (contentType.includes('application/json')) {
|
|
81
|
+
try {
|
|
82
|
+
return { status: response.status, headers: responseHeaders, body: JSON.parse(rawBody) as unknown };
|
|
83
|
+
} catch {
|
|
84
|
+
// Fall through to raw string if JSON parsing fails
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return { status: response.status, headers: responseHeaders, body: rawBody };
|
|
89
|
+
}
|
package/tools/scrape.ts
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// workflowskill_scrape — Fetch a web page and extract data via CSS selectors.
|
|
2
|
+
|
|
3
|
+
import { load } from 'cheerio';
|
|
4
|
+
|
|
5
|
+
const TIMEOUT_MS = 30_000;
|
|
6
|
+
const MAX_BYTES = 10 * 1024 * 1024; // 10 MB
|
|
7
|
+
|
|
8
|
+
export interface ScrapeParams {
|
|
9
|
+
url: string;
|
|
10
|
+
selectors: Record<string, string>;
|
|
11
|
+
headers?: Record<string, string>;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface ScrapeResult {
|
|
15
|
+
status: number;
|
|
16
|
+
results?: Record<string, string[]>;
|
|
17
|
+
error?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export async function scrapeHandler(params: ScrapeParams): Promise<ScrapeResult> {
|
|
21
|
+
const { url, selectors, headers = {} } = params;
|
|
22
|
+
|
|
23
|
+
// Protocol validation
|
|
24
|
+
let parsed: URL;
|
|
25
|
+
try {
|
|
26
|
+
parsed = new URL(url);
|
|
27
|
+
} catch {
|
|
28
|
+
return { status: 0, error: `Invalid URL: ${url}` };
|
|
29
|
+
}
|
|
30
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
|
|
31
|
+
return { status: 0, error: `Unsupported protocol: ${parsed.protocol}` };
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
let response: Response;
|
|
35
|
+
try {
|
|
36
|
+
response = await fetch(url, {
|
|
37
|
+
headers,
|
|
38
|
+
signal: AbortSignal.timeout(TIMEOUT_MS),
|
|
39
|
+
});
|
|
40
|
+
} catch (err) {
|
|
41
|
+
return { status: 0, error: err instanceof Error ? err.message : String(err) };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Read with size guard
|
|
45
|
+
let html: string;
|
|
46
|
+
try {
|
|
47
|
+
const buffer = await response.arrayBuffer();
|
|
48
|
+
if (buffer.byteLength > MAX_BYTES) {
|
|
49
|
+
return { status: response.status, error: `Response too large: ${buffer.byteLength} bytes (max ${MAX_BYTES})` };
|
|
50
|
+
}
|
|
51
|
+
html = new TextDecoder().decode(buffer);
|
|
52
|
+
} catch (err) {
|
|
53
|
+
return { status: response.status, error: err instanceof Error ? err.message : String(err) };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const $ = load(html);
|
|
57
|
+
const results: Record<string, string[]> = {};
|
|
58
|
+
|
|
59
|
+
for (const [key, selector] of Object.entries(selectors)) {
|
|
60
|
+
const texts: string[] = [];
|
|
61
|
+
$(selector).each((_i, el) => {
|
|
62
|
+
const text = $(el).text().trim();
|
|
63
|
+
if (text.length > 0) texts.push(text);
|
|
64
|
+
});
|
|
65
|
+
results[key] = texts;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return { status: response.status, results };
|
|
69
|
+
}
|