@blinkdotnew/cli 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -1
- package/dist/cli.js +61 -0
- package/package.json +1 -1
- package/src/cli.ts +2 -0
- package/src/commands/web.ts +74 -0
package/README.md
CHANGED
|
@@ -36,6 +36,10 @@ blink db query "SELECT count(*) FROM users"
|
|
|
36
36
|
# Generate AI content
|
|
37
37
|
blink ai image "a glowing blink logo on dark background"
|
|
38
38
|
blink ai text "Summarize this article: ..."
|
|
39
|
+
|
|
40
|
+
# Scrape websites
|
|
41
|
+
blink scrape https://lovable.dev --extract "pricing tiers and costs"
|
|
42
|
+
blink scrape https://example.com --text
|
|
39
43
|
```
|
|
40
44
|
|
|
41
45
|
---
|
|
@@ -218,7 +222,7 @@ blink ai transcribe https://example.com/audio.mp3 --language en
|
|
|
218
222
|
|
|
219
223
|
---
|
|
220
224
|
|
|
221
|
-
### `blink fetch`
|
|
225
|
+
### `blink fetch`, `blink search`, `blink scrape` — Web & data
|
|
222
226
|
|
|
223
227
|
```bash
|
|
224
228
|
# Fetch any URL via Blink proxy (handles CORS, auth headers)
|
|
@@ -229,8 +233,21 @@ blink fetch https://api.example.com --header "X-API-Key: secret"
|
|
|
229
233
|
# Web search
|
|
230
234
|
blink search "latest AI news"
|
|
231
235
|
blink search "React Server Components" --count 10 --json
|
|
236
|
+
|
|
237
|
+
# Scrape web pages
|
|
238
|
+
blink scrape https://example.com # Raw response
|
|
239
|
+
blink scrape https://example.com --text # Clean text (strips HTML)
|
|
240
|
+
blink scrape https://example.com --extract "all prices" # AI-extract specific data
|
|
241
|
+
blink scrape https://news.ycombinator.com --extract "top 10 story titles and URLs"
|
|
242
|
+
blink scrape https://example.com --extract "contact email" --json
|
|
232
243
|
```
|
|
233
244
|
|
|
245
|
+
No project key needed — `blink scrape` uses your workspace key only.
|
|
246
|
+
|
|
247
|
+
- `--text` — strips all HTML tags, returns readable text
|
|
248
|
+
- `--extract <instructions>` — uses AI (Gemini Flash) to extract exactly what you ask for
|
|
249
|
+
- Combine with `--json` for `{ url, content }` or `{ url, extracted }` output
|
|
250
|
+
|
|
234
251
|
---
|
|
235
252
|
|
|
236
253
|
### `blink realtime` — Pub/sub
|
package/dist/cli.js
CHANGED
|
@@ -509,6 +509,65 @@ Examples:
|
|
|
509
509
|
if (typeof result === "string") console.log(result);
|
|
510
510
|
else console.log(JSON.stringify(result, null, 2));
|
|
511
511
|
});
|
|
512
|
+
program2.command("scrape <url>").description("Scrape a webpage \u2014 returns text content, optionally AI-extracted data").option("--extract <instructions>", 'What to extract using AI (e.g. "all prices and product names")').option("--text", "Strip HTML tags and return clean readable text").addHelpText("after", `
|
|
513
|
+
No project key needed \u2014 scrape runs through the Blink proxy using your workspace key.
|
|
514
|
+
|
|
515
|
+
Examples:
|
|
516
|
+
$ blink scrape https://example.com Get full page content
|
|
517
|
+
$ blink scrape https://example.com --text Clean text only (no HTML)
|
|
518
|
+
$ blink scrape https://example.com --extract "all prices" AI-extract specific data
|
|
519
|
+
$ blink scrape https://news.ycombinator.com --extract "top 10 story titles and URLs" --json
|
|
520
|
+
`).action(async (url, opts) => {
|
|
521
|
+
requireToken();
|
|
522
|
+
const response = await withSpinner(
|
|
523
|
+
"Fetching page...",
|
|
524
|
+
() => resourcesRequest("/api/v1/fetch", { body: { url } })
|
|
525
|
+
);
|
|
526
|
+
let content;
|
|
527
|
+
if (typeof response === "string") {
|
|
528
|
+
content = response;
|
|
529
|
+
} else if (typeof response?.body === "string") {
|
|
530
|
+
content = response.body;
|
|
531
|
+
} else {
|
|
532
|
+
content = JSON.stringify(response);
|
|
533
|
+
}
|
|
534
|
+
if (opts.extract) {
|
|
535
|
+
const extraction = await withSpinner(
|
|
536
|
+
`Extracting: ${opts.extract}...`,
|
|
537
|
+
() => resourcesRequest("/api/v1/ai/chat/completions", {
|
|
538
|
+
body: {
|
|
539
|
+
model: "google/gemini-3-flash",
|
|
540
|
+
messages: [
|
|
541
|
+
{
|
|
542
|
+
role: "system",
|
|
543
|
+
content: "You are a web scraping assistant. Extract exactly the requested information from the webpage content provided. Return only the extracted data, formatted clearly. No commentary."
|
|
544
|
+
},
|
|
545
|
+
{
|
|
546
|
+
role: "user",
|
|
547
|
+
content: `URL: ${url}
|
|
548
|
+
|
|
549
|
+
Webpage content:
|
|
550
|
+
${content.slice(0, 5e4)}
|
|
551
|
+
|
|
552
|
+
Extract: ${opts.extract}`
|
|
553
|
+
}
|
|
554
|
+
],
|
|
555
|
+
stream: false
|
|
556
|
+
}
|
|
557
|
+
})
|
|
558
|
+
);
|
|
559
|
+
const extracted = extraction?.choices?.[0]?.message?.content ?? "";
|
|
560
|
+
if (isJsonMode()) return printJson({ url, extracted, extract_instructions: opts.extract });
|
|
561
|
+
console.log(extracted);
|
|
562
|
+
} else if (opts.text) {
|
|
563
|
+
const text = content.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, " ").replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, " ").replace(/<[^>]+>/g, " ").replace(/ /g, " ").replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/\s+/g, " ").trim();
|
|
564
|
+
if (isJsonMode()) return printJson({ url, text });
|
|
565
|
+
console.log(text);
|
|
566
|
+
} else {
|
|
567
|
+
if (isJsonMode()) return printJson({ url, content });
|
|
568
|
+
console.log(content);
|
|
569
|
+
}
|
|
570
|
+
});
|
|
512
571
|
program2.command("search <query>").description("Web search \u2014 returns titles, URLs, and snippets").option("--count <n>", "Number of results to return (max 20)", "5").addHelpText("after", `
|
|
513
572
|
Examples:
|
|
514
573
|
$ blink search "latest AI news"
|
|
@@ -1539,6 +1598,8 @@ Web & Data:
|
|
|
1539
1598
|
$ blink search "latest AI news" --count 10
|
|
1540
1599
|
$ blink fetch https://api.github.com/users/octocat
|
|
1541
1600
|
$ blink fetch https://api.example.com --method POST --body '{"key":"val"}'
|
|
1601
|
+
$ blink scrape https://example.com --text Clean text (no HTML)
|
|
1602
|
+
$ blink scrape https://example.com --extract "prices" AI-extract specific data
|
|
1542
1603
|
|
|
1543
1604
|
Realtime / RAG / Notify:
|
|
1544
1605
|
$ blink realtime publish updates '{"type":"refresh"}'
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -70,6 +70,8 @@ Web & Data:
|
|
|
70
70
|
$ blink search "latest AI news" --count 10
|
|
71
71
|
$ blink fetch https://api.github.com/users/octocat
|
|
72
72
|
$ blink fetch https://api.example.com --method POST --body '{"key":"val"}'
|
|
73
|
+
$ blink scrape https://example.com --text Clean text (no HTML)
|
|
74
|
+
$ blink scrape https://example.com --extract "prices" AI-extract specific data
|
|
73
75
|
|
|
74
76
|
Realtime / RAG / Notify:
|
|
75
77
|
$ blink realtime publish updates '{"type":"refresh"}'
|
package/src/commands/web.ts
CHANGED
|
@@ -40,6 +40,80 @@ Examples:
|
|
|
40
40
|
else console.log(JSON.stringify(result, null, 2))
|
|
41
41
|
})
|
|
42
42
|
|
|
43
|
+
program.command('scrape <url>')
|
|
44
|
+
.description('Scrape a webpage — returns text content, optionally AI-extracted data')
|
|
45
|
+
.option('--extract <instructions>', 'What to extract using AI (e.g. "all prices and product names")')
|
|
46
|
+
.option('--text', 'Strip HTML tags and return clean readable text')
|
|
47
|
+
.addHelpText('after', `
|
|
48
|
+
No project key needed — scrape runs through the Blink proxy using your workspace key.
|
|
49
|
+
|
|
50
|
+
Examples:
|
|
51
|
+
$ blink scrape https://example.com Get full page content
|
|
52
|
+
$ blink scrape https://example.com --text Clean text only (no HTML)
|
|
53
|
+
$ blink scrape https://example.com --extract "all prices" AI-extract specific data
|
|
54
|
+
$ blink scrape https://news.ycombinator.com --extract "top 10 story titles and URLs" --json
|
|
55
|
+
`)
|
|
56
|
+
.action(async (url, opts) => {
|
|
57
|
+
requireToken()
|
|
58
|
+
|
|
59
|
+
const response = await withSpinner('Fetching page...', () =>
|
|
60
|
+
resourcesRequest('/api/v1/fetch', { body: { url } })
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
// Extract the response body — fetch returns { status, contentType, body } or raw string
|
|
64
|
+
let content: string
|
|
65
|
+
if (typeof response === 'string') {
|
|
66
|
+
content = response
|
|
67
|
+
} else if (typeof response?.body === 'string') {
|
|
68
|
+
content = response.body
|
|
69
|
+
} else {
|
|
70
|
+
content = JSON.stringify(response)
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (opts.extract) {
|
|
74
|
+
// AI extraction — uses workspace key, no project needed
|
|
75
|
+
const extraction = await withSpinner(`Extracting: ${opts.extract}...`, () =>
|
|
76
|
+
resourcesRequest('/api/v1/ai/chat/completions', {
|
|
77
|
+
body: {
|
|
78
|
+
model: 'google/gemini-3-flash',
|
|
79
|
+
messages: [
|
|
80
|
+
{
|
|
81
|
+
role: 'system',
|
|
82
|
+
content: 'You are a web scraping assistant. Extract exactly the requested information from the webpage content provided. Return only the extracted data, formatted clearly. No commentary.',
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
role: 'user',
|
|
86
|
+
content: `URL: ${url}\n\nWebpage content:\n${content.slice(0, 50000)}\n\nExtract: ${opts.extract}`,
|
|
87
|
+
},
|
|
88
|
+
],
|
|
89
|
+
stream: false,
|
|
90
|
+
},
|
|
91
|
+
})
|
|
92
|
+
)
|
|
93
|
+
const extracted = extraction?.choices?.[0]?.message?.content ?? ''
|
|
94
|
+
if (isJsonMode()) return printJson({ url, extracted, extract_instructions: opts.extract })
|
|
95
|
+
console.log(extracted)
|
|
96
|
+
} else if (opts.text) {
|
|
97
|
+
// Strip HTML and return clean text
|
|
98
|
+
const text = content
|
|
99
|
+
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, ' ')
|
|
100
|
+
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, ' ')
|
|
101
|
+
.replace(/<[^>]+>/g, ' ')
|
|
102
|
+
.replace(/ /g, ' ')
|
|
103
|
+
.replace(/&/g, '&')
|
|
104
|
+
.replace(/</g, '<')
|
|
105
|
+
.replace(/>/g, '>')
|
|
106
|
+
.replace(/"/g, '"')
|
|
107
|
+
.replace(/\s+/g, ' ')
|
|
108
|
+
.trim()
|
|
109
|
+
if (isJsonMode()) return printJson({ url, text })
|
|
110
|
+
console.log(text)
|
|
111
|
+
} else {
|
|
112
|
+
if (isJsonMode()) return printJson({ url, content })
|
|
113
|
+
console.log(content)
|
|
114
|
+
}
|
|
115
|
+
})
|
|
116
|
+
|
|
43
117
|
program.command('search <query>')
|
|
44
118
|
.description('Web search — returns titles, URLs, and snippets')
|
|
45
119
|
.option('--count <n>', 'Number of results to return (max 20)', '5')
|