@blinkdotnew/cli 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -36,6 +36,10 @@ blink db query "SELECT count(*) FROM users"
36
36
  # Generate AI content
37
37
  blink ai image "a glowing blink logo on dark background"
38
38
  blink ai text "Summarize this article: ..."
39
+
40
+ # Scrape websites
41
+ blink scrape https://lovable.dev --extract "pricing tiers and costs"
42
+ blink scrape https://example.com --text
39
43
  ```
40
44
 
41
45
  ---
@@ -218,7 +222,7 @@ blink ai transcribe https://example.com/audio.mp3 --language en
218
222
 
219
223
  ---
220
224
 
221
- ### `blink fetch` and `blink search` — Web & data
225
+ ### `blink fetch`, `blink search`, `blink scrape` — Web & data
222
226
 
223
227
  ```bash
224
228
  # Fetch any URL via Blink proxy (handles CORS, auth headers)
@@ -229,8 +233,21 @@ blink fetch https://api.example.com --header "X-API-Key: secret"
229
233
  # Web search
230
234
  blink search "latest AI news"
231
235
  blink search "React Server Components" --count 10 --json
236
+
237
+ # Scrape web pages
238
+ blink scrape https://example.com # Raw response
239
+ blink scrape https://example.com --text # Clean text (strips HTML)
240
+ blink scrape https://example.com --extract "all prices" # AI-extract specific data
241
+ blink scrape https://news.ycombinator.com --extract "top 10 story titles and URLs"
242
+ blink scrape https://example.com --extract "contact email" --json
232
243
  ```
233
244
 
245
+ No project key needed — `blink scrape` uses your workspace key only.
246
+
247
+ - `--text` — strips all HTML tags, returns readable text
248
+ - `--extract <instructions>` — uses AI (Gemini Flash) to extract exactly what you ask for
249
+ - Combine with `--json` for `{ url, content }` or `{ url, extracted }` output
250
+
234
251
  ---
235
252
 
236
253
  ### `blink realtime` — Pub/sub
package/dist/cli.js CHANGED
@@ -509,6 +509,65 @@ Examples:
509
509
  if (typeof result === "string") console.log(result);
510
510
  else console.log(JSON.stringify(result, null, 2));
511
511
  });
512
+ program2.command("scrape <url>").description("Scrape a webpage \u2014 returns text content, optionally AI-extracted data").option("--extract <instructions>", 'What to extract using AI (e.g. "all prices and product names")').option("--text", "Strip HTML tags and return clean readable text").addHelpText("after", `
513
+ No project key needed \u2014 scrape runs through the Blink proxy using your workspace key.
514
+
515
+ Examples:
516
+ $ blink scrape https://example.com Get full page content
517
+ $ blink scrape https://example.com --text Clean text only (no HTML)
518
+ $ blink scrape https://example.com --extract "all prices" AI-extract specific data
519
+ $ blink scrape https://news.ycombinator.com --extract "top 10 story titles and URLs" --json
520
+ `).action(async (url, opts) => {
521
+ requireToken();
522
+ const response = await withSpinner(
523
+ "Fetching page...",
524
+ () => resourcesRequest("/api/v1/fetch", { body: { url } })
525
+ );
526
+ let content;
527
+ if (typeof response === "string") {
528
+ content = response;
529
+ } else if (typeof response?.body === "string") {
530
+ content = response.body;
531
+ } else {
532
+ content = JSON.stringify(response);
533
+ }
534
+ if (opts.extract) {
535
+ const extraction = await withSpinner(
536
+ `Extracting: ${opts.extract}...`,
537
+ () => resourcesRequest("/api/v1/ai/chat/completions", {
538
+ body: {
539
+ model: "google/gemini-3-flash",
540
+ messages: [
541
+ {
542
+ role: "system",
543
+ content: "You are a web scraping assistant. Extract exactly the requested information from the webpage content provided. Return only the extracted data, formatted clearly. No commentary."
544
+ },
545
+ {
546
+ role: "user",
547
+ content: `URL: ${url}
548
+
549
+ Webpage content:
550
+ ${content.slice(0, 5e4)}
551
+
552
+ Extract: ${opts.extract}`
553
+ }
554
+ ],
555
+ stream: false
556
+ }
557
+ })
558
+ );
559
+ const extracted = extraction?.choices?.[0]?.message?.content ?? "";
560
+ if (isJsonMode()) return printJson({ url, extracted, extract_instructions: opts.extract });
561
+ console.log(extracted);
562
+ } else if (opts.text) {
563
+ const text = content.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, " ").replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, " ").replace(/<[^>]+>/g, " ").replace(/&nbsp;/g, " ").replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/\s+/g, " ").trim();
564
+ if (isJsonMode()) return printJson({ url, text });
565
+ console.log(text);
566
+ } else {
567
+ if (isJsonMode()) return printJson({ url, content });
568
+ console.log(content);
569
+ }
570
+ });
512
571
  program2.command("search <query>").description("Web search \u2014 returns titles, URLs, and snippets").option("--count <n>", "Number of results to return (max 20)", "5").addHelpText("after", `
513
572
  Examples:
514
573
  $ blink search "latest AI news"
@@ -1539,6 +1598,8 @@ Web & Data:
1539
1598
  $ blink search "latest AI news" --count 10
1540
1599
  $ blink fetch https://api.github.com/users/octocat
1541
1600
  $ blink fetch https://api.example.com --method POST --body '{"key":"val"}'
1601
+ $ blink scrape https://example.com --text Clean text (no HTML)
1602
+ $ blink scrape https://example.com --extract "prices" AI-extract specific data
1542
1603
 
1543
1604
  Realtime / RAG / Notify:
1544
1605
  $ blink realtime publish updates '{"type":"refresh"}'
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blinkdotnew/cli",
3
- "version": "0.2.0",
3
+ "version": "0.2.2",
4
4
  "description": "Blink platform CLI — deploy apps, manage databases, generate AI content",
5
5
  "bin": {
6
6
  "blink": "dist/cli.js"
package/src/cli.ts CHANGED
@@ -70,6 +70,8 @@ Web & Data:
70
70
  $ blink search "latest AI news" --count 10
71
71
  $ blink fetch https://api.github.com/users/octocat
72
72
  $ blink fetch https://api.example.com --method POST --body '{"key":"val"}'
73
+ $ blink scrape https://example.com --text Clean text (no HTML)
74
+ $ blink scrape https://example.com --extract "prices" AI-extract specific data
73
75
 
74
76
  Realtime / RAG / Notify:
75
77
  $ blink realtime publish updates '{"type":"refresh"}'
@@ -40,6 +40,80 @@ Examples:
40
40
  else console.log(JSON.stringify(result, null, 2))
41
41
  })
42
42
 
43
+ program.command('scrape <url>')
44
+ .description('Scrape a webpage — returns text content, optionally AI-extracted data')
45
+ .option('--extract <instructions>', 'What to extract using AI (e.g. "all prices and product names")')
46
+ .option('--text', 'Strip HTML tags and return clean readable text')
47
+ .addHelpText('after', `
48
+ No project key needed — scrape runs through the Blink proxy using your workspace key.
49
+
50
+ Examples:
51
+ $ blink scrape https://example.com Get full page content
52
+ $ blink scrape https://example.com --text Clean text only (no HTML)
53
+ $ blink scrape https://example.com --extract "all prices" AI-extract specific data
54
+ $ blink scrape https://news.ycombinator.com --extract "top 10 story titles and URLs" --json
55
+ `)
56
+ .action(async (url, opts) => {
57
+ requireToken()
58
+
59
+ const response = await withSpinner('Fetching page...', () =>
60
+ resourcesRequest('/api/v1/fetch', { body: { url } })
61
+ )
62
+
63
+ // Extract the response body — fetch returns { status, contentType, body } or raw string
64
+ let content: string
65
+ if (typeof response === 'string') {
66
+ content = response
67
+ } else if (typeof response?.body === 'string') {
68
+ content = response.body
69
+ } else {
70
+ content = JSON.stringify(response)
71
+ }
72
+
73
+ if (opts.extract) {
74
+ // AI extraction — uses workspace key, no project needed
75
+ const extraction = await withSpinner(`Extracting: ${opts.extract}...`, () =>
76
+ resourcesRequest('/api/v1/ai/chat/completions', {
77
+ body: {
78
+ model: 'google/gemini-3-flash',
79
+ messages: [
80
+ {
81
+ role: 'system',
82
+ content: 'You are a web scraping assistant. Extract exactly the requested information from the webpage content provided. Return only the extracted data, formatted clearly. No commentary.',
83
+ },
84
+ {
85
+ role: 'user',
86
+ content: `URL: ${url}\n\nWebpage content:\n${content.slice(0, 50000)}\n\nExtract: ${opts.extract}`,
87
+ },
88
+ ],
89
+ stream: false,
90
+ },
91
+ })
92
+ )
93
+ const extracted = extraction?.choices?.[0]?.message?.content ?? ''
94
+ if (isJsonMode()) return printJson({ url, extracted, extract_instructions: opts.extract })
95
+ console.log(extracted)
96
+ } else if (opts.text) {
97
+ // Strip HTML and return clean text
98
+ const text = content
99
+ .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, ' ')
100
+ .replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, ' ')
101
+ .replace(/<[^>]+>/g, ' ')
102
+ .replace(/&nbsp;/g, ' ')
103
+ .replace(/&amp;/g, '&')
104
+ .replace(/&lt;/g, '<')
105
+ .replace(/&gt;/g, '>')
106
+ .replace(/&quot;/g, '"')
107
+ .replace(/\s+/g, ' ')
108
+ .trim()
109
+ if (isJsonMode()) return printJson({ url, text })
110
+ console.log(text)
111
+ } else {
112
+ if (isJsonMode()) return printJson({ url, content })
113
+ console.log(content)
114
+ }
115
+ })
116
+
43
117
  program.command('search <query>')
44
118
  .description('Web search — returns titles, URLs, and snippets')
45
119
  .option('--count <n>', 'Number of results to return (max 20)', '5')